diff --git a/Collagen_Bx2-4.ipynb b/Collagen_Bx2-4.ipynb
new file mode 100755
index 0000000..a30d41c
--- /dev/null
+++ b/Collagen_Bx2-4.ipynb
@@ -0,0 +1,506 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load libraries\n",
+    "\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import copy\n",
+    "import seaborn as sns\n",
+    "import importlib\n",
+    "import scipy\n",
+    "\n",
+    "import scanpy as sc\n",
+    "from sklearn.cluster import KMeans\n",
+    "from sklearn.preprocessing import scale, minmax_scale\n",
+    "from sklearn.metrics import silhouette_score\n",
+    "import matplotlib as mpl\n",
+    "mpl.rc('figure', max_open_warning = 0)\n",
+    "#mpl.font_manager._rebuild()\n",
+    "mpl.rcParams['mathtext.fontset'] = 'custom'\n",
+    "mpl.rcParams['mathtext.it'] = 'Arial:italic'\n",
+    "mpl.rcParams['mathtext.rm'] = 'Arial'\n",
+    "mpl.rcParams['font.sans-serif'] = \"Arial\"\n",
+    "mpl.rcParams['font.family'] = \"sans-serif\"\n",
+    "mpl.rc('font', serif='Arial') \n",
+    "codedir = os.getcwd()\n",
+    "#load cmif libraries\n",
+    "#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF')\n",
+    "from mplex_image import visualize as viz, process, preprocess, normalize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(codedir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(222)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Table of contents <a name=\"contents\"></a>\n",
+    "1. [Load Data](#load)\n",
+    "2. [Normalize](#norm)\n",
+    "6. [Visualize Normalization](#normviz)\n",
+    "[leiden for cell typing](#clusterlei)\n",
+    "7. [Cluster K means](#cluster)\n",
+    "8. [Leiden cluster](#clust1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load data\n",
+    "os.chdir(f'{codedir}/paper_data')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_date = '20210402'\n",
+    "if not os.path.exists(s_date):\n",
+    "    os.mkdir(s_date)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load Data <a name=\"load\"></a>\n",
+    "\n",
+    "2.\tAs Ki67 is not continuous antigen, can you count positive cells (Proliferative cluster) by distance (<25, 25-50, 50-75, >75) from collagen I in each Bx?\n",
+    "\n",
+    "3.\tCould you map cells by distance (<25, 25-50, 50-75, >75) from collagen I in each Bx? If you can add a distance column (1-4) in the cluster csv, I can make it in Qi.\n",
+    "\n",
+    "4.\tCould you try to see the correlation between ER/PCNA and (VIM+aSMA+CD31)? – not necessary to show significance. (see attached image from Bx1 Scene-003)\n",
+    "\n",
+    "[contents](#contents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### not normalized"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_mi = pd.read_csv('20210324_SMTBx1-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv',index_col=0) \n",
+    "df_mi['slide'] = [item.split('_')[0] for item in df_mi.index]\n",
+    "df_mi['slide_scene'] = [item.split('_cell')[0] for item in df_mi.index]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for s_file in os.listdir():\n",
+    "    if s_file.find('MaskDistances') > -1:\n",
+    "        print(s_file)\n",
+    "df_mask = pd.DataFrame()\n",
+    "for s_sample in ['SMT101Bx1-16','SMTBx2-5','SMTBx3','SMTBx4-3','HTA-33']: #'SMT101Bx4-3',\n",
+    "    df_mask = df_mask.append(pd.read_csv(f'features_{s_sample}_MaskDistances.csv',index_col=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_mask.columns\n",
+    "ls_target = ['Vim_dist','CD31_dist', 'PDPN_dist',  'aSMA_dist', 'CD68_dist','ColI_dist', 'ColIV_dist']\n",
+    "ls_marker =  ['ER_nuclei','Ki67_nuclei','PCNA_nuclei']\n",
+    "ls_drop = ['HTA-33_scene001','SMTBx1-16_scene001'#,'SMT101Bx4-3_scene001','SMT101Bx4-3_scene002'\n",
+    "          ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df_mi.merge(df_mask.loc[:,ls_target],left_index=True,right_index=True)\n",
+    "df = df[(~df.Vim_dist.isna()) & (~df.slide_scene.isin(ls_drop))]\n",
+    "df.loc[:,ls_target] = df.loc[:,ls_target]*.325"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "#fit\n",
+    "data = df.loc[:,ls_marker].T\n",
+    "batch = df.slide\n",
+    "bayesdata = normalize.combat(data, batch)\n",
+    "df_norm = bayesdata.T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_norm['slide'] = df.slide\n",
+    "df_norm.groupby('slide').mean()\n",
+    "df_norm.groupby('slide').std()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['Vim-CD31-aSMA_dist'] = df.loc[:,['Vim_dist','CD31_dist','aSMA_dist']].min(axis=1)\n",
+    "ls_target = ls_target + ['Vim-CD31-aSMA_dist']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "%matplotlib inline\n",
+    "#by tissue no Bx1\n",
+    "sns.set(style='white')\n",
+    "import matplotlib.ticker as tic\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "tot = 0\n",
+    "ls_dist = [25, 50, 75]\n",
+    "i_diff = 25\n",
+    "ls_slide = ['SMTBx2-5', 'SMTBx3','SMT1Bx4-3'] #'\n",
+    "d_slide = {'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3','HTA-33':'Bx4-HTAN','SMTBx4-3':'Bx4'}\n",
+    "for s_target in ['ColI_dist', 'ColIV_dist','Vim-CD31-aSMA_dist']:\n",
+    "    print(s_target)\n",
+    "    fig, ax = plt.subplots(3,2, figsize=(4.5,4),sharex=True,dpi=300)\n",
+    "    for idxc, s_slide in enumerate(ls_slide):\n",
+    "        print(s_slide)\n",
+    "        df_slide = df[df.slide==s_slide]\n",
+    "        for idx, s_marker in enumerate(['ER_nuclei', 'PCNA_nuclei']): #,'Ki67_nuclei']):\n",
+    "            print(s_marker)\n",
+    "            df_result = pd.DataFrame(index=df_slide.index)\n",
+    "            for s_dist in ls_dist:\n",
+    "                b_bool = (df_slide.loc[:,s_target] < s_dist) & (df_slide.loc[:,s_target] >= s_dist - i_diff)\n",
+    "                df_result.loc[b_bool,f'{s_marker}_{s_dist}'] = df_slide.loc[b_bool,s_marker]\n",
+    "            for  s_col in df_result.columns:\n",
+    "                sns.kdeplot(df_result.loc[:,s_col].dropna(), ax=ax[idxc,idx],\n",
+    "                            label=f\"< {s_col.split('_')[2]}\"#,fill=True, alpha=0.3\n",
+    "                           )\n",
+    "            if df_result.mean().fillna(0)[2] == 0:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna())\n",
+    "                print(len(df_result.iloc[:,0].dropna()))\n",
+    "                print(len(df_result.iloc[:,1].dropna()))\n",
+    "            else:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna(),df_result.iloc[:,2].dropna())\n",
+    "                print(len(df_result.iloc[:,0].dropna()))\n",
+    "                print(len(df_result.iloc[:,1].dropna()))\n",
+    "                print('over75')\n",
+    "                print(len(df_result.iloc[:,2].dropna()))\n",
+    "            ax[idxc,idx].set_xlabel(f\"{s_col.split('_')[0]} Intensity\",fontname=\"Arial\",fontsize=18)\n",
+    "            ax[idxc,idx].set_ylabel(f\"\")\n",
+    "            ax[idxc,idx].set_title(f\"\")\n",
+    "            temp = tic.MaxNLocator(3)\n",
+    "            ax[idxc,idx].set_yticklabels(())\n",
+    "            ax[idxc,idx].xaxis.set_major_locator(temp)\n",
+    "            tot+=1\n",
+    "            if pvalue < 0.001: # 0.05/30: #bonferoni correction\n",
+    "                ax[idxc,idx].text(0.42, 0.87, '*',\n",
+    "                     horizontalalignment='center',\n",
+    "                     verticalalignment='center',\n",
+    "                     transform=ax[idxc,idx].transAxes)\n",
+    "            ax[idxc,idx].set_xlim(-1000,5500)\n",
+    "            ax[idxc,idx].spines['right'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['left'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['top'].set_visible(False)\n",
+    "            #print(ax[idxc,idx].get_xticklabels())\n",
+    "            #ax[idxc,idx].set_xticklabels(ax[idxc,idx].get_xticklabels(),{'fontsize':16})\n",
+    "        ax[idxc,0].set_ylabel(f\"{d_slide[s_slide]}\",fontname=\"Arial\",fontsize=18)\n",
+    "    ax[2,1].legend(title='$\\mu$m',borderpad=.3,labelspacing=.3,loc=4,fontsize=14)\n",
+    "    plt.subplots_adjust(wspace=.001,hspace=.001)\n",
+    "    plt.suptitle(f\"Distance to {s_target.split('_')[0]}\",y=.93,fontname=\"Arial\",fontsize=24)\n",
+    "    plt.tight_layout()\n",
+    "    fig.savefig(f'./{s_date}/IntensityvsDistance_{i_diff}s_{s_target}_by_slide_noBx1.png',dpi=300)\n",
+    "    #fig.savefig(f'./{s_date}/IntensityvsDistance_{i_diff}s_{s_target}_by_slide_noBx1.pdf',dpi=200)\n",
+    "    #break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    " 0.05/30"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import gridspec\n",
+    "ax_objs = []\n",
+    "ls_slide = ['SMTBx2-5', 'SMTBx3','SMT1Bx4-3'] #'\n",
+    "d_slide = {'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3','HTA-33':'Bx4-HTAN','SMTBx4-3':'Bx4'}\n",
+    "for s_target in ['ColI_dist', 'ColIV_dist','Vim-CD31-aSMA_dist']:\n",
+    "    fig = plt.figure(figsize=(5.5,3.5),dpi=300)\n",
+    "    gs = gridspec.GridSpec(nrows=3,  ncols=2,figure=fig, \n",
+    "                       wspace=0.1, hspace=0.05,left=0.1, right=.75\n",
+    "                      )\n",
+    "    for idxc, s_slide in enumerate(ls_slide):\n",
+    "        df_slide = df[df.slide==s_slide]\n",
+    "        for idx, s_marker in enumerate(['ER_nuclei', 'PCNA_nuclei']):\n",
+    "            ax_objs.append(fig.add_subplot(gs[idxc,idx]))\n",
+    "            df_result = pd.DataFrame(index=df_slide.index)\n",
+    "            for s_dist in ls_dist:\n",
+    "                b_bool = (df_slide.loc[:,s_target] < s_dist) & (df_slide.loc[:,s_target] >= s_dist - i_diff)\n",
+    "                df_result.loc[b_bool,f'{s_marker}_{s_dist}'] = df_slide.loc[b_bool,s_marker]\n",
+    "            for  s_col in df_result.columns:\n",
+    "                g =sns.kdeplot(df_result.loc[:,s_col].dropna(), ax=ax_objs[-1],\n",
+    "                               label=f\"< {s_col.split('_')[2]}\"#,fill=True,alpha=0.5\n",
+    "                              )\n",
+    "            if df_result.mean().fillna(0)[2] == 0:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna())\n",
+    "                #print(pvalue)\n",
+    "            else:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna(),df_result.iloc[:,2].dropna())\n",
+    "            ax_objs[-1].set_ylabel(f\"\")\n",
+    "            ax_objs[-1].set_title(f\"\")\n",
+    "            temp = tic.MaxNLocator(3)\n",
+    "            ax_objs[-1].set_yticklabels(())\n",
+    "            ax_objs[-1].xaxis.set_major_locator(temp)\n",
+    "            tot+=1\n",
+    "            if pvalue < 0.001: # 0.05/30: #bonferoni correction\n",
+    "                ax_objs[-1].text(0.55, 0.65, '*',\n",
+    "                     horizontalalignment='center',\n",
+    "                     verticalalignment='center',\n",
+    "                     transform=ax_objs[-1].transAxes)\n",
+    "            ax_objs[-1].set_xlim(-1000,5500)\n",
+    "            ax_objs[-1].spines['right'].set_visible(False)\n",
+    "            ax_objs[-1].spines['left'].set_visible(False)\n",
+    "            ax_objs[-1].spines['top'].set_visible(False)\n",
+    "            #ax_objs[-1].spines['bottom'].set_visible(False)\n",
+    "            ax_objs[-1].set_xlabel('')\n",
+    "            rect = ax_objs[-1].patch\n",
+    "            rect.set_alpha(0)\n",
+    "            if idx == 0:\n",
+    "                ax_objs[-1].set_ylabel(f\"{d_slide[s_slide]}\",fontsize=18)\n",
+    "            if idx==1:\n",
+    "                if idxc == 2:\n",
+    "                    ax_objs[-1].legend(title='$\\mu$m',borderpad=.3,labelspacing=.3,fontsize=12,loc='upper left', bbox_to_anchor=(1.05, 1.5))\n",
+    "            if idxc ==2:\n",
+    "                ax_objs[-1].set_xlabel(f\"{s_col.split('_')[0]} Intensity\",fontsize=18)\n",
+    "            else:\n",
+    "                ax_objs[-1].set_xticklabels([])           \n",
+    "    plt.suptitle(f\"Distance to {s_target.split('_')[0]}\",x=.45,y=.95,fontsize=20)\n",
+    "    gs.update(bottom = 0.2)\n",
+    "    fig.savefig(f'./{s_date}/IntensityvsDistance_{i_diff}s_{s_target}_by_slide_noBx1_bigger.png',dpi=200)\n",
+    "    #break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#by tissue w bx1\n",
+    "%matplotlib inline\n",
+    "sns.set(style='white')\n",
+    "import matplotlib.ticker as tic\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "tot = 0\n",
+    "ls_dist = [25, 50, 75]\n",
+    "i_diff = 25\n",
+    "ls_slide = ['SMTBx1-16','SMTBx2-5', 'SMTBx3','SMT1Bx4-3'] #'\n",
+    "d_slide = {'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3','HTA-33':'Bx4-HTAN','SMTBx4-3':'Bx4'}\n",
+    "for s_target in  ls_target + ['Vim-CD31-aSMA_dist']: #['CD68_dist','ColI_dist', 'ColIV_dist']:\n",
+    "    fig, ax = plt.subplots(4,3, figsize=(7,5),sharex=True,dpi=300)\n",
+    "    for idxc, s_slide in enumerate(ls_slide):\n",
+    "        df_slide = df[df.slide==s_slide]\n",
+    "        for idx, s_marker in enumerate(ls_marker):\n",
+    "            df_result = pd.DataFrame(index=df_slide.index)\n",
+    "            for s_dist in ls_dist:\n",
+    "                b_bool = (df_slide.loc[:,s_target] < s_dist) & (df_slide.loc[:,s_target] >= s_dist - i_diff)\n",
+    "                df_result.loc[b_bool,f'{s_marker}_{s_dist}'] = df_slide.loc[b_bool,s_marker]\n",
+    "            for  s_col in df_result.columns:\n",
+    "                sns.kdeplot(df_result.loc[:,s_col].dropna(), ax=ax[idxc,idx], label=f\"< {s_col.split('_')[2]}\")\n",
+    "            if df_result.mean().fillna(0)[2] == 0:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna())\n",
+    "                #print(pvalue)\n",
+    "            else:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna(),df_result.iloc[:,2].dropna())\n",
+    "            ax[idxc,idx].set_xlabel(f\"{s_col.split('_')[0]} Intensity\",fontsize=18)\n",
+    "            ax[idxc,idx].set_ylabel(f\"\")\n",
+    "            ax[idxc,idx].set_title(f\"\")\n",
+    "            temp = tic.MaxNLocator(3)\n",
+    "            ax[idxc,idx].set_yticklabels(())\n",
+    "            ax[idxc,idx].xaxis.set_major_locator(temp)\n",
+    "            tot+=1\n",
+    "            if pvalue < 0.001: # 0.05/30: #bonferoni correction\n",
+    "                ax[idxc,idx].text(0.5, 0.8, '*',\n",
+    "                     horizontalalignment='center',\n",
+    "                     verticalalignment='center',\n",
+    "                     transform=ax[idxc,idx].transAxes)\n",
+    "            ax[idxc,idx].set_xlim(-1500,7000)\n",
+    "            ax[idxc,idx].spines['right'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['left'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['top'].set_visible(False)\n",
+    "        ax[idxc,0].set_ylabel(f\"{d_slide[s_slide]}\",fontsize=18)\n",
+    "    ax[0,2].legend(title='$\\mu$m')\n",
+    "    plt.subplots_adjust(wspace=.001,hspace=.001)\n",
+    "    plt.suptitle(f\"Distance to {s_target.split('_')[0]}\",fontsize=20)\n",
+    "    plt.tight_layout()\n",
+    "    fig.savefig(f'./{s_date}/IntensityvsDistance_25s_{s_target}_by_slide.png',dpi=300)\n",
+    "    #break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#by tissue w bx1\n",
+    "%matplotlib inline\n",
+    "sns.set(style='white')\n",
+    "import matplotlib.ticker as tic\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "tot = 0\n",
+    "ls_dist = [25, 50, 75]\n",
+    "i_diff = 25\n",
+    "ls_slide = ['SMTBx2-5', 'SMTBx3','SMT1Bx4-3'] #'SMTBx1-16',\n",
+    "d_slide = {'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3','HTA-33':'Bx4-HTAN','SMTBx4-3':'Bx4'}\n",
+    "for s_target in ['ColI_dist', 'ColIV_dist']:\n",
+    "    fig, ax = plt.subplots(3,3, figsize=(7,4),sharex=True)\n",
+    "    for idxc, s_slide in enumerate(ls_slide):\n",
+    "        df_slide = df[df.slide==s_slide]\n",
+    "        for idx, s_marker in enumerate(ls_marker):\n",
+    "            df_result = pd.DataFrame(index=df_slide.index)\n",
+    "            for s_dist in ls_dist:\n",
+    "                b_bool = (df_slide.loc[:,s_target] < s_dist) & (df_slide.loc[:,s_target] >= s_dist - i_diff)\n",
+    "                df_result.loc[b_bool,f'{s_marker}_{s_dist}'] = df_slide.loc[b_bool,s_marker]\n",
+    "            for  s_col in df_result.columns:\n",
+    "                sns.kdeplot(df_result.loc[:,s_col].dropna(), ax=ax[idxc,idx], label=f\"< {s_col.split('_')[2]}\")\n",
+    "            if df_result.mean().fillna(0)[2] == 0:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna())\n",
+    "                #print(pvalue)\n",
+    "            else:\n",
+    "                statistic, pvalue = scipy.stats.f_oneway(df_result.iloc[:,0].dropna(),df_result.iloc[:,1].dropna(),df_result.iloc[:,2].dropna())\n",
+    "            ax[idxc,idx].set_xlabel(f\"{s_col.split('_')[0]} Intensity\")\n",
+    "            ax[idxc,idx].set_ylabel(f\"\")\n",
+    "            ax[idxc,idx].set_title(f\"\")\n",
+    "            temp = tic.MaxNLocator(3)\n",
+    "            ax[idxc,idx].set_yticklabels(())\n",
+    "            ax[idxc,idx].xaxis.set_major_locator(temp)\n",
+    "            tot+=1\n",
+    "            if pvalue < 0.001: # 0.05/30: #bonferoni correction\n",
+    "                ax[idxc,idx].text(0.5, 0.8, '*',\n",
+    "                     horizontalalignment='center',\n",
+    "                     verticalalignment='center',\n",
+    "                     transform=ax[idxc,idx].transAxes)\n",
+    "            ax[idxc,idx].set_xlim(-1500,7000)\n",
+    "            ax[idxc,idx].spines['right'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['left'].set_visible(False)\n",
+    "            ax[idxc,idx].spines['top'].set_visible(False)\n",
+    "        ax[idxc,0].set_ylabel(f\"{d_slide[s_slide]}\")\n",
+    "    ax[0,2].legend(title='$\\mu$m')\n",
+    "    plt.subplots_adjust(wspace=.001,hspace=.001)\n",
+    "    plt.suptitle(f\"Distance to {s_target.split('_')[0]}\")\n",
+    "    plt.tight_layout()\n",
+    "    fig.savefig(f'./{s_date}/IntensityvsDistance_25s_{s_target}_by_slide.png',dpi=200)\n",
+    "    #break"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3.9.5",
+   "language": "python",
+   "name": "python3.9.5"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/GateCellTypes.ipynb b/GateCellTypes.ipynb
new file mode 100755
index 0000000..bba7702
--- /dev/null
+++ b/GateCellTypes.ipynb
@@ -0,0 +1,573 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load libraries\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import copy\n",
+    "import seaborn as sns\n",
+    "import importlib\n",
+    "from matplotlib import cm\n",
+    "import matplotlib as mpl\n",
+    "mpl.rc('figure', max_open_warning = 0)\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "mpl.rcParams['mathtext.fontset'] = 'custom'\n",
+    "mpl.rcParams['mathtext.it'] = 'Arial:italic'\n",
+    "mpl.rcParams['mathtext.rm'] = 'Arial'\n",
+    "codedir = os.getcwd()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load cmif libraries\n",
+    "#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF')\n",
+    "from mplex_image import visualize as viz, process, preprocess, gating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(codedir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notes\n",
+    "\n",
+    "use CD45 to gate immune (CD3 more artifact)\n",
+    "\n",
+    "update 20200402: add SMT-Bx2-5 and HTA-33, simplified gating."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#set location of files\n",
+    "#load data\n",
+    "rootdir = f'{codedir}/paper_data'\n",
+    "# go to location of files\n",
+    "os.chdir(rootdir)\n",
+    "preprocess.cmif_mkdir(['GatingPlots'])\n",
+    "#os.listdir()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 3 define samples to work with/ image combos\n",
+    "ls_sample = ['20210402_SMT']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_data = pd.DataFrame()\n",
+    "for s_sample in ls_sample:\n",
+    "    df_data = df_data.append(pd.read_csv(f'{s_sample}_ManualPositive.csv',index_col=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d_rename = {'CD4':'CD4_Ring','CD8':'CD8_Ring',\n",
+    "           #'HER2':'HER2_Ring','ER':'ER_Nuclei'\n",
+    "           }\n",
+    "df_data = df_data.rename(d_rename, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Specify Gating Strategy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#parameters\n",
+    "\n",
+    "# cell types\n",
+    "ls_endothelial = ['CD31']\n",
+    "ls_immune = ['CD45','CD68'] \n",
+    "ls_tumor = ['CK7','CK19','Ecad'] \n",
+    "ls_prolif = ['Ki67']\n",
+    "\n",
+    "#tcell/myeloid\n",
+    "s_tcell = 'CD45' \n",
+    "s_bcell = 'CD20'\n",
+    "s_myeloid = 'CD68'\n",
+    "ls_immune_functional = ['PD1','CD44','prolif'] # not in dataset: 'FoxP3_Nuclei','GRNZB_Nuclei',\n",
+    "\n",
+    "#luminal/basal/mesenchymal\n",
+    "ls_luminal = ['CK19','CK7'] # not in dataset 'CK8_Ring'\n",
+    "ls_basal = ['CK5','CK14'] \n",
+    "ls_mes = ['CD44', 'Vim'] \n",
+    "ls_tumor_plus = ['Ecad'] + ['Lum']\n",
+    "ls_stromal_function =  ['Vim','aSMA','PDPN']\n",
+    "ls_tumor_prolif = ['PCNA','Ki67','pHH3']  \n",
+    "\n",
+    "#index of cell line samples (i.e. 100% tumor)\n",
+    "ls_cellline_index = []\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#custom gating\n",
+    "df_data = gating.main_celltypes(df_data,ls_endothelial,ls_immune,ls_tumor,ls_cellline_index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#add normal liver\n",
+    "df_data.loc[(~df_data.loc[:,ls_luminal].any(axis=1) & df_data.loc[:,'Ecad'] & df_data.loc[:,'tumor']),'celltype'] = 'epithelial'\n",
+    "df_data.loc[df_data.celltype == 'epithelial','tumor'] = False\n",
+    "df_data.loc[df_data.celltype == 'epithelial','epithelial'] = True\n",
+    "df_data.loc[df_data.celltype != 'epithelial','epithelial'] = False\n",
+    "df_data.epithelial = df_data.epithelial.astype('bool')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "importlib.reload(gating)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Perform Gating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "#simple gating\n",
+    "df_data = gating.proliferation(df_data,ls_prolif)\n",
+    "df_data = gating.immune_types(df_data,s_myeloid,s_bcell,s_tcell)\n",
+    "df_data = gating.cell_prolif(df_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "#cutom gating (skip)\n",
+    "'''\n",
+    "df_data = gating.immune_functional(df_data,ls_immune_functional)\n",
+    "df_data = gating.diff_hr_state(df_data,ls_luminal,ls_basal,ls_mes)\n",
+    "df_data = gating.celltype_gates(df_data,ls_tumor_prolif,s_new_name='TumorProlif',s_celltype='tumor')\n",
+    "#df_data = gating.celltype_gates(df_data,ls_tumor_plus,s_new_name='TumorDiffPlus',s_celltype='tumor')\n",
+    "df_data = gating.celltype_gates(df_data,ls_stromal_function,s_new_name='StromalType',s_celltype='stromal')\n",
+    "'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_data = gating.non_tumor(df_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Output Gating Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check\n",
+    "ls_drop = ['ColI', 'ColIV', 'CD20', 'CD3', 'CD44', 'CK14',\n",
+    "       'CK5', 'ER', 'HER2', 'LamAC', 'PCNA', 'PD1', 'pHH3']\n",
+    "df_data.loc[:,df_data.dtypes=='object'].drop(ls_drop,axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#drop extra colums\n",
+    "df_gate = df_data.loc[:,df_data.dtypes!='bool'].drop(ls_drop,axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#handcrafted stromal populations (skip)\n",
+    "'''\n",
+    "d_rename_stroma = {'stromal_Vim_aSMA':'myofibroblast', 'stromal_aSMA':'myofibroblast', 'stromal___':'stromal', 'stromal_Vim':'fibroblast',\n",
+    "       'stromal_PDPN_Vim_aSMA':'myofibroblast', 'stromal_PDPN_Vim':'fibroblast', 'stromal_PDPN':'lymphatic',\n",
+    "       'stromal_PDPN_aSMA':'myofibroblast'}\n",
+    "df_gate.NonTumor = df_gate.NonTumor.replace(d_rename_stroma)\n",
+    "df_gate['FinalCell'] = df_gate.NonTumor.fillna(df_gate.CellProlif).fillna(df_gate.celltype)\n",
+    "df_gate.FinalCell = df_gate.FinalCell.replace({'tumor_nonprolif':'tumor','liver_nonprolif':'liver','liver_prolif':'liver'})\n",
+    "'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_gate.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_out = '20210402_SMT'\n",
+    "if not os.path.exists(f'{s_out}_GatedPositiveCellNames.csv'):\n",
+    "    print('saving new csv')\n",
+    "    df_gate.to_csv(f'{s_out}_GatedPositiveCellNames.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importlib.reload(viz)\n",
+    "s_out = '20210402_SMT'\n",
+    "f'{s_out}_GatedPositiveCellNames.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_data = pd.read_csv(f'{s_out}_GatedPositiveCellNames.csv',index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#df_data['Stromal'] = df_data.StromalType.replace(d_rename_stroma)\n",
+    "#df_data['NonTumor'] = df_data.NonTumor.replace(d_rename_stroma)\n",
+    "#df_data['NonTumorFunc'] = df_data.NonTumorFunc.replace(d_rename_stroma)\n",
+    "#handcrafted stromal populations\n",
+    "#d_rename_stroma = {'stromal_Vim_aSMA':'myofibroblast', 'stromal_aSMA':'myofibroblast', 'stromal___':'stromal', 'stromal_Vim':'fibroblast',\n",
+    "#       'stromal_PDPN_Vim_aSMA':'myofibroblast', 'stromal_PDPN_Vim':'fibroblast', 'stromal_PDPN':'lymphatic',\n",
+    "#       'stromal_PDPN_aSMA':'myofibroblast'}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(df_data.columns == 'FinalCell').any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#combined cell type (run once)\n",
+    "if not (df_data.columns == 'FinalCell').any():\n",
+    "    df_data.loc[df_data.celltype == 'tumor','FinalCell'] = df_data.loc[df_data.celltype == 'tumor','CellProlif']\n",
+    "    df_data.loc[df_data.celltype != 'tumor','FinalCell'] = df_data.loc[df_data.celltype != 'tumor','celltype']\n",
+    "    df_data.loc[df_data.celltype == 'immune','FinalCell'] = df_data.loc[df_data.celltype == 'immune','ImmuneType']\n",
+    "\n",
+    "#df_data.FinalCell.unique()\n",
+    "#df_data.to_csv(f'{s_out}_GatedPositiveCellNames.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_drop = df_data.loc[((df_data.index.str.contains('HTA')) & (df_data.FinalCell=='epithelial'))].index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get rid epithelial\n",
+    "# except HTAN\n",
+    "df_data['FinalCell'] = df_data.FinalCell.replace({'epithelial':'stromal'})\n",
+    "df_data = df_data.drop(ls_drop)\n",
+    "df_data['countme'] = True\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "s_grouper='slide_scene'\n",
+    "\n",
+    "#calculate proportions\n",
+    "for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():\n",
+    "    df_prop = viz.prop_positive(df_data,s_cell=s_cell,s_grouper=s_grouper)\n",
+    "    # make annotations\n",
+    "    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)\n",
+    "    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))\n",
+    "    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.01,lut=lut)\n",
+    "    g.savefig(f'./GatingPlots/{s_cell}_clustermap.png',dpi=150)\n",
+    "    plt.close()\n",
+    "    fig = viz.prop_barplot(df_plot_less,s_cell,colormap=\"Spectral\")\n",
+    "    fig.savefig(f'./GatingPlots/{s_cell}_bar.png',dpi=200)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#group by tissue\n",
+    "df_data['slide_scene'] = [item.split('_')[0] for item in df_data.slide_scene]\n",
+    "df_data_select = df_data.loc[~df_data.slide_scene.isin(['HTA-33_scene001','SMTBx1-16_scene001']),:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#by tissue\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "s_grouper='slide_scene'\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "\n",
+    "#calculate proportions\n",
+    "for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():\n",
+    "    df_prop = viz.prop_positive(df_data_select,s_cell=s_cell,s_grouper=s_grouper)\n",
+    "    # make annotations\n",
+    "    df_prop.to_csv(f'ManualGating_SMT_proportions_{s_cell}.csv')\n",
+    "    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)\n",
+    "    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))\n",
+    "    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.001,lut=lut)\n",
+    "    g.savefig(f'./GatingPlots/{s_cell}_clustermap_tissue.pdf',dpi=150)\n",
+    "    plt.close()\n",
+    "    if df_plot_less.shape[1] < 8:\n",
+    "        cmap = \"Spectral\"\n",
+    "    elif df_plot_less.shape[1] < 11:\n",
+    "        cmap = \"Paired\"\n",
+    "    else:\n",
+    "        cmap = \"tab20\"\n",
+    "    fig = viz.prop_barplot(df_plot_less,s_cell,colormap=cmap)\n",
+    "    fig.savefig(f'./GatingPlots/{s_cell}_bar_tissue.pdf',dpi=200)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_date = '20210402'\n",
+    "d_crop = {'SMTBx2-5_scene001': (2000,9000),\n",
+    "    'SMTBx3_scene004': (20000,16000),\n",
+    "    'HTA-33_scene002': (3271, 607),\n",
+    "    'SMTBx1-16_scene003': (2440,220),\n",
+    "        }\n",
+    "df_result = pd.DataFrame()\n",
+    "for s_tissue, tu_crop in d_crop.items():\n",
+    "    df_scene = df_data.loc[df_data.index.str.contains(s_tissue)]\n",
+    "    ls_index = df_scene.loc[((df_scene.DAPI_X > tu_crop[0]) & (df_scene.DAPI_X < tu_crop[0]+2500)) & (df_scene.DAPI_Y > tu_crop[1]) & (df_scene.DAPI_Y < tu_crop[1]+2500)].index\n",
+    "    df_result = df_result.append(df_data.loc[ls_index])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#by tissue\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "s_grouper='slide_scene'\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "d_rename = {'HTA-33':'Bx4', 'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3'}\n",
+    "\n",
+    "#calculate proportions\n",
+    "for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():\n",
+    "    df_prop = viz.prop_positive(df_result,s_cell=s_cell,s_grouper=s_grouper)\n",
+    "    # make annotations\n",
+    "    #df_prop.to_csv(f'ManualGating_SMT101_proportions_{s_cell}.csv')\n",
+    "    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)\n",
+    "    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))\n",
+    "    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.001,lut=lut)\n",
+    "    g.savefig(f'./GatingPlots/{s_cell}_clustermap_tissue3.pdf',dpi=150)\n",
+    "    plt.close()\n",
+    "    if df_plot_less.shape[1] < 8:\n",
+    "        cmap = \"Spectral\"\n",
+    "    elif df_plot_less.shape[1] < 11:\n",
+    "        cmap = \"Paired\"\n",
+    "    else:\n",
+    "        cmap = \"tab20\"\n",
+    "    fig = viz.prop_barplot(df_plot_less.rename(d_rename),s_cell,colormap=cmap)\n",
+    "    fig.set_size_inches(4.5, 2.3)\n",
+    "    ax_list = fig.axes\n",
+    "    ax_list[0].set_ylabel('')\n",
+    "    ax_list[0].set_xlabel('Fraction of Cells')\n",
+    "    ax_list[0].set_title('')\n",
+    "    fig.suptitle('Gating Composition: Biopsies',x=0.5,y=0.9,fontsize=14)\n",
+    "    plt.tight_layout()\n",
+    "    fig.savefig(f'./GatingPlots/{s_cell}_bar_tissue3.png',dpi=200)\n",
+    "    #fig.savefig(f'./{s_date}/{s_cell}_bar_tissue3.pdf',dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_date"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3.9.5",
+   "language": "python",
+   "name": "python3.9.5"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Normalize_Bx2-4.ipynb b/Normalize_Bx2-4.ipynb
new file mode 100755
index 0000000..45a5f00
--- /dev/null
+++ b/Normalize_Bx2-4.ipynb
@@ -0,0 +1,1198 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load libraries\n",
+    "\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import copy\n",
+    "import seaborn as sns\n",
+    "import importlib\n",
+    "from scipy.signal import argrelmax, find_peaks, peak_widths\n",
+    "import scanpy as sc\n",
+    "from sklearn.cluster import KMeans\n",
+    "from sklearn.preprocessing import scale, minmax_scale\n",
+    "from sklearn.metrics import silhouette_score\n",
+    "import matplotlib as mpl\n",
+    "mpl.rc('figure', max_open_warning = 0)\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "mpl.rcParams['mathtext.it'] = 'Arial:italic'\n",
+    "mpl.rcParams['mathtext.rm'] = 'Arial'\n",
+    "codedir = os.getcwd()\n",
+    "#load cmif libraries\n",
+    "#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF')\n",
+    "from mplex_image import visualize as viz, process, preprocess, normalize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(codedir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(222)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Table of contents <a name=\"contents\"></a>\n",
+    "1. [Load Data](#load)\n",
+    "2. [Normalize](#norm)\n",
+    "3. [Visualize Normalization](#normviz)\n",
+    "4. [leiden for cell typing](#clusterlei)\n",
+    "5. [Leiden cluster](#clust1)\n",
+    "\n",
+    "\n",
+    "note:\n",
+    "\n",
+    "    Could you make composite fraction bar graph only  in following regions?\n",
+    "\n",
+    "    Bx2: SMTBx2-5-Scene-001_ROI1-2000-9000-2500-2500\n",
+    "    Bx3: SMTBx3-Scene-004_ROI2-20900-15494-2500-2500\n",
+    "    Bx4: HTA-33-Scene-002_ROI1-3271-607-2500-2500\n",
+    "\n",
+    "    If we can have it in Bx1\n",
+    "    Bx: SMTBx1-Scene-003_ROI1-2440-220-2500-2500\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load data\n",
+    "os.chdir(f'{codedir}/paper_data')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_date = '20210402'\n",
+    "if not os.path.exists(s_date):\n",
+    "    os.mkdir(s_date)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load Data <a name=\"load\"></a>\n",
+    "\n",
+    "[contents](#contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(f'{codedir}/paper_data')\n",
+    "df_file = pd.DataFrame(index=os.listdir())\n",
+    "df_file = df_file[df_file.index.str.contains('FilteredMeanIntensity_DAPI')]\n",
+    "df_file['tissue'] = [item.split('_')[1] for item in df_file.index]\n",
+    "df_file['dapi'] = ['DAPI' + item.split('y_DAPI')[1].split('.')[0] for item in df_file.index]\n",
+    "ls_sample = df_file.tissue.tolist()\n",
+    "d_dapi = dict(zip(df_file.tissue.tolist(),df_file.dapi.tolist()))\n",
+    "d_dapi.update({'JE-TMA-60': 'DAPI10_DAPI2'})\n",
+    "df_mi = pd.DataFrame()\n",
+    "df_xy = pd.DataFrame()\n",
+    "df_edge = pd.DataFrame()\n",
+    "\n",
+    "for s_sample in sorted(set(ls_sample)):\n",
+    "    #if not s_sample.find('HTA')>-1:\n",
+    "        print(f'loading {s_sample}')\n",
+    "        df_mi = df_mi.append(pd.read_csv(f'{codedir}/paper_data/features_{s_sample}_FilteredMeanIntensity_{d_dapi[s_sample]}.csv', index_col=0))\n",
+    "        df_xy = df_xy.append(pd.read_csv(f'{codedir}/paper_data/features_{s_sample}_CentroidXY.csv',index_col=0))\n",
+    "        if os.path.exists(f'{codedir}/paper_data/features_{s_sample}_EdgeCells153pixels_CentroidXY.csv'):\n",
+    "            df_edge = df_edge.append(pd.read_csv(f'{codedir}/paper_data/features_{s_sample}_EdgeCells153pixels_CentroidXY.csv',index_col=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#sorted(df_mi.columns[df_mi[~df_mi.index.str.contains('JE-TMA-60')].isna().sum() != 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_marker = ['AR_nuclei', 'CD20_perinuc5', 'CD31_perinuc5', 'CD3_perinuc5',  'CD44_perinuc5', 'CD45_perinuc5',#'CD44_nucadj2',\n",
+    " 'CD4_perinuc5', 'CD68_perinuc5','CD8_perinuc5', 'CK14_cytoplasm', 'CK17_cytoplasm', 'CK19_cytoplasm', 'CK5_cytoplasm',\n",
+    " 'CK7_cytoplasm', 'CK8_cytoplasm', 'ColI_perinuc5', 'ColIV_perinuc5','CoxIV_perinuc5','EGFR_cytoplasm', 'ER_nuclei',\n",
+    " 'Ecad_cytoplasm', 'FoxP3_nuclei', 'GRNZB_nuclei', 'H3K27_nuclei','H3K4_nuclei', 'HER2_cellmem25','Ki67_nuclei',\n",
+    " 'LamAC_nuclei', 'PCNA_nuclei', 'PD1_perinuc5', 'PDPN_perinuc5','DAPI2_nuclei',  # 'ER_nuclei25','HER2_cytoplasm','PgR_nuclei','Vim_nucadj2'\n",
+    " 'Vim_perinuc5', 'aSMA_perinuc5', 'pHH3_nuclei', 'pRB_nuclei', 'pS6RP_perinuc5','slide_scene',\n",
+    "           ] # CD8R bad, 'gH2AX_nuclei' in R11 Bx3 not included\n",
+    "\n",
+    "df_mi = df_mi.loc[:,ls_marker]\n",
+    " \n",
+    "# old \n",
+    "#df_mi = df_mi.loc[:,['HER2_cellmem25', 'DAPI2_nuclei',# 'CD44_nucadj2', 'Vim_nucadj2','ER_nuclei25','HER2_cytoplasm',\n",
+    "#       'CD20_perinuc5', 'CD3_perinuc5', 'CD31_perinuc5', 'CD4_perinuc5','CD44_perinuc5', 'CD45_perinuc5', 'CD68_perinuc5', 'CD8_perinuc5',\n",
+    "#       'PD1_perinuc5', 'PDPN_perinuc5', 'Vim_perinuc5', 'aSMA_perinuc5','CK14_cytoplasm', 'CK17_cytoplasm', 'CK19_cytoplasm', 'CK5_cytoplasm',\n",
+    "#       'CK7_cytoplasm', 'Ecad_cytoplasm', 'ER_nuclei', 'Ki67_nuclei', 'LamAC_nuclei','PCNA_nuclei', 'pHH3_nuclei', 'slide_scene']]\n",
+    "\n",
+    "\n",
+    "df_mi['batch'] = [item.split('_')[0] for item in df_mi.index]\n",
+    "#df_mi['scene'] = [item.split('_')[1] for item in df_mi.index]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Deal with JE-TMA-60"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# markers in JE-TMA-60\n",
+    "#'JE-TMA-60_scene06', 'JE-TMA-60_scene08', 'JE-TMA-60_scene09', 'JE-TMA-60_scene10', 'JE-TMA-60_scene11', 'JE-TMA-60_scene13'\n",
+    "# R5 is CK17.PDPN.CD45.FoxP3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_R5 = pd.read_csv(f'{codedir}/paper_data/features_JE-TMA-60_FilteredMeanIntensity_DAPI5_DAPI2.csv',index_col=0)\n",
+    "df_R4 = pd.read_csv(f'{codedir}/paper_data/features_JE-TMA-60_FilteredMeanIntensity_DAPI4_DAPI2.csv',index_col=0)\n",
+    "df_R10 = df_mi[df_mi.batch=='JE-TMA-60']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_scene = set(df_R10.slide_scene)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_na = set([item.split('_cell')[0] for item in df_R5.index]) - set([item.split('_cell')[0] for item in df_R10.index])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#slect markers, scenes for normalization (based on JE-TMA-60 tissue loss)\n",
+    "ls_pos = ['HER2_cellmem25','CK19_cytoplasm','CK7_cytoplasm','CK8_cytoplasm','Ecad_cytoplasm','ER_nuclei','Ki67_nuclei','LamAC_nuclei',\n",
+    "          'PCNA_nuclei','pHH3_nuclei','Vim_perinuc5','DAPI2_nuclei','H3K27_nuclei','H3K4_nuclei', 'pRB_nuclei','pS6RP_perinuc5',\n",
+    "         'CoxIV_perinuc5','EGFR_cytoplasm']\n",
+    "ls_R5 = ['CK17_cytoplasm','PDPN_perinuc5','CD45_perinuc5','FoxP3_nuclei'] #\n",
+    "ls_R4 = ['pHH3_nuclei','CK14_cytoplasm','Ki67_nuclei','CK19_cytoplasm','CK5_cytoplasm','HER2_cellmem25',\n",
+    "        'Ecad_cytoplasm', 'ER_nuclei','CD44_perinuc5', 'PCNA_nuclei','aSMA_perinuc5','CD3_perinuc5','EGFR_cytoplasm']\n",
+    "ls_bad = ['CD20_perinuc5', 'CD31_perinuc5', 'CD4_perinuc5', 'CD68_perinuc5', 'CD8_perinuc5','PD1_perinuc5',\n",
+    "         'ColI_perinuc5', 'ColIV_perinuc5'] #'CK7_cytoplasm', #'LamAC_nuclei',\n",
+    "#ls_good = ['CK7_cytoplasm','Vim_perinuc5','LamAC_nuclei']\n",
+    "\n",
+    "#R4\n",
+    "df =  df_mi[df_mi.batch!='JE-TMA-60']\n",
+    "df = df.append(df_R4.loc[:,ls_R4])\n",
+    "#R5\n",
+    "ls_index = df_R5.loc[df_R5.index.isin(df_R4.index)].index\n",
+    "df.loc[ls_index,ls_R5] = df_R5.loc[ls_index,ls_R5]\n",
+    "\n",
+    "#fill R6-8\n",
+    "ls_index = df_mi.loc[(df_mi.slide_scene.isin(ls_scene)) & (df_mi.index.isin(df_R4.index))].index\n",
+    "df.loc[ls_index,ls_pos] = df_R10.loc[ls_index,ls_pos]\n",
+    "\n",
+    "#\n",
+    "df['batch'] = [item.split('_')[0] for item in df.index]\n",
+    "#df['scene'] = [item.split('_')[1] for item in df.index]\n",
+    "df['slide_scene'] = [item.split('_cell')[0] for item in df.index]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## filter edge cells"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#filter out unwanted cells\n",
+    "d_filter = {#41 (not used)\n",
+    "            'JE-TMA-41_scene01':(df_xy.DAPI_Y > 5000),'JE-TMA-41_scene03':(df_xy.DAPI_Y > 5000),\n",
+    "            'JE-TMA-41_scene04':(df_xy.DAPI_Y < 1500),'JE-TMA-41_scene05':(df_xy.DAPI_Y > 5000),\n",
+    "            'JE-TMA-41_scene06':(df_xy.DAPI_Y < 1500),'JE-TMA-41_scene08':(df_xy.DAPI_Y < 1500),\n",
+    "            'JE-TMA-41_scene09':(df_xy.DAPI_Y > 5000),'JE-TMA-41_scene11':(df_xy.DAPI_Y < 1500),\n",
+    "            #43\n",
+    "            'JE-TMA-43_scene09':(df_xy.DAPI_Y < 1200),'JE-TMA-43_scene14':(df_xy.DAPI_Y < 1200),\n",
+    "            #60\n",
+    "            'JE-TMA-60_scene02':(df_xy.DAPI_X < 1500),'JE-TMA-60_scene05':(df_xy.DAPI_X < 1500),\n",
+    "            'JE-TMA-60_scene11':(df_xy.DAPI_Y < 1500),'JE-TMA-60_scene14':(df_xy.DAPI_X < 1500),\n",
+    "            'JE-TMA-60_scene06':(df_xy.DAPI_Y < 1500),'JE-TMA-60_scene08':(df_xy.DAPI_Y > 5000),\n",
+    "            'JE-TMA-60_scene10':(df_xy.DAPI_Y < 1500),\n",
+    "            #63\n",
+    "            'JE-TMA-62_scene01':(df_xy.DAPI_Y > 5000),\n",
+    "            'JE-TMA-62_scene02':(df_xy.DAPI_X > 5000),'JE-TMA-62_scene03':(df_xy.DAPI_X < 1000),\n",
+    "            'JE-TMA-62_scene04':(df_xy.DAPI_Y < 1500),'JE-TMA-62_scene06':(df_xy.DAPI_X < 1000),\n",
+    "            'JE-TMA-62_scene08':(df_xy.DAPI_Y > 5000),'JE-TMA-62_scene10':(df_xy.DAPI_Y < 1500),\n",
+    "            #'SMTBx1-16_scene001':(df_xy.DAPI_Y > 1), #keep scene 1 for manual thresholding\n",
+    "            'SMTBx2-3_scene002':(df_xy.DAPI_Y > 5000),'SMTBx3_scene004':(df_xy.DAPI_X <11000),\n",
+    "            'SMTBx3_scene005':(df_xy.DAPI_X > 0),'SMTBx4-3_scene001':(df_xy.DAPI_Y < 2400),\n",
+    "            'SMTBx2-5_scene002':(df_xy.DAPI_Y > 5000),'HTA-33_scene003':(df_xy.DAPI_Y > 9000)}\n",
+    "d_filter2 = {'JE-TMA-60_scene02':(df_xy.DAPI_Y > 4500)}\n",
+    "ls_filter_all = []\n",
+    "for s_scene, filtercon in d_filter.items():\n",
+    "    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()\n",
+    "    ls_filter_all = ls_filter_all + ls_filter\n",
+    "for s_scene, filtercon in d_filter2.items():\n",
+    "    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()\n",
+    "    ls_filter_all = ls_filter_all + ls_filter\n",
+    "#filter edge\n",
+    "ls_filter_all = ls_filter_all + df_edge.index.tolist()\n",
+    "df_filter_mi = df[(~df.index.isin(ls_filter_all))]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_cluster = df_filter_mi.loc[:,['HER2_cellmem25','slide_scene']]\n",
+    "df_cluster['cluster'] = 1\n",
+    "df_cluster.drop('HER2_cellmem25',axis=1,inplace=True)\n",
+    "import importlib\n",
+    "importlib.reload(viz)\n",
+    "%matplotlib inline\n",
+    "viz.plot_clusters(df_cluster,df_xy,s_num='few')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#match controls to biopsies\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "d_replace = {'BC44290-146': 'JE-TMA-41',\n",
+    " 'SMTBx2-3': 'JE-TMA-41',\n",
+    " 'SMTBx2-5':'JE-TMA-43',\n",
+    " 'SMTBx3':'JE-TMA-60',\n",
+    " 'SMTBx4-3':'JE-TMA-62'}\n",
+    "df_filter_mi.loc[:,'batch'] = df_filter_mi.batch.replace(d_replace)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#standardize the scenes\n",
+    "d_replace = { 'JE-TMA-41_scene13':'JE-TMA-41_scene14',\n",
+    "             'JE-TMA-41_scene12':'JE-TMA-41_scene13',\n",
+    "             'JE-TMA-62_scene13':'JE-TMA-62_scene14',\n",
+    "             'JE-TMA-62_scene12':'JE-TMA-62_scene13'}\n",
+    "df_filter_mi.loc[:,'scene'] = df_filter_mi.slide_scene.replace(d_replace)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "df_filter_mi.merge(df_xy.loc[:,['DAPI_X', 'DAPI_Y', 'nuclei_area', 'nuclei_eccentricity']],left_index=True,right_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_out = df_filter_mi.merge(df_xy.loc[:,['DAPI_X', 'DAPI_Y', 'nuclei_area', 'nuclei_eccentricity']],left_index=True,right_index=True)\n",
+    "len(df_out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#2-23 contains NAs\n",
+    "#2-22 the NAs were filled with random gaussian data\n",
+    "# 0302 include scene 1 Bx1\n",
+    "# 0318 just Bx2 - 4, (Bx2-5)\n",
+    "# 20210324 has HTA9-1-33\n",
+    "if not os.path.exists('20210324_SMTBx1-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv'):\n",
+    "    print('saving csv')\n",
+    "    #df_out.to_csv('20210223_SMTBx1-4_JE-TMA-41_60_62_BC44290-146.csv')\n",
+    "    df_out.to_csv('20210324_SMTBx1-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#2-23 contains NAs\n",
+    "#2-22 the NAs were filled with random gaussian data\n",
+    "# 0302 include scene 1 Bx1\n",
+    "# 0318 just Bx2 - 4, (Bx2-5)\n",
+    "if not os.path.exists('20210320_SMTBx2-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv'):\n",
+    "    print('saving csv')\n",
+    "    #df_out.to_csv('20210223_SMTBx1-4_JE-TMA-41_60_62_BC44290-146.csv')\n",
+    "    df_out.to_csv('20210320_SMTBx2-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv') "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Normalization <a name=\"norm\"></a>\n",
+    "\n",
+    "use ComBat.\n",
+    "\n",
+    "[contents](#contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_mi = pd.read_csv('20210320_SMTBx2-4_JE-TMA-43_60_62_FilteredMeanIntensity.csv',index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_mi.scene.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_pos = ['HER2_cellmem25','CK19_cytoplasm','CK7_cytoplasm','CK8_cytoplasm','Ecad_cytoplasm','ER_nuclei','Ki67_nuclei','LamAC_nuclei',\n",
+    "          'PCNA_nuclei','pHH3_nuclei','Vim_perinuc5','DAPI2_nuclei','H3K27_nuclei','H3K4_nuclei', 'pRB_nuclei','pS6RP_perinuc5',\n",
+    "         'CoxIV_perinuc5','EGFR_cytoplasm']\n",
+    "ls_R5 = ['CK17_cytoplasm','PDPN_perinuc5','CD45_perinuc5','FoxP3_nuclei'] #\n",
+    "ls_R4 = ['pHH3_nuclei','CK14_cytoplasm','Ki67_nuclei','CK19_cytoplasm','CK5_cytoplasm','HER2_cellmem25',\n",
+    "        'Ecad_cytoplasm', 'ER_nuclei','CD44_perinuc5', 'PCNA_nuclei','aSMA_perinuc5','CD3_perinuc5','EGFR_cytoplasm']\n",
+    "ls_bad = ['CD20_perinuc5', 'CD31_perinuc5', 'CD4_perinuc5', 'CD68_perinuc5', 'CD8_perinuc5','PD1_perinuc5',\n",
+    "         'ColI_perinuc5', 'ColIV_perinuc5']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#select normalization scenes\n",
+    "ls_R10_scene = ['scene06', 'scene08', 'scene09', 'scene10', 'scene11', 'scene13']\n",
+    "ls_R10 = ['HER2_cellmem25', 'CK19_cytoplasm', 'CK7_cytoplasm', 'Ecad_cytoplasm', 'ER_nuclei', 'Ki67_nuclei', 'LamAC_nuclei',\n",
+    "          'PCNA_nuclei','pHH3_nuclei', 'Vim_perinuc5','CD44_perinuc5','DAPI2_nuclei', #adding following:\n",
+    "          'CK8_cytoplasm','CoxIV_perinuc5', 'EGFR_cytoplasm', 'H3K27_nuclei', 'H3K4_nuclei', 'pRB_nuclei', 'pS6RP_perinuc5']\n",
+    "#note: CK17 may have quenching artifact; PDPN not good in Bx1, so just CD45 important\n",
+    "#'CK17_cytoplasm','PDPN_perinuc5', #'FoxP3_nuclei' not in full set\n",
+    "ls_R5 = ['PDPN_perinuc5','CD45_perinuc5','FoxP3_nuclei', 'aSMA_perinuc5','CD3_perinuc5']  # aSMA because N breast, scene 01 better than 07 for immune\n",
+    "ls_R5_scene = ['scene01','scene03','scene04']\n",
+    "#old ls_R4 = ['pHH3_nuclei','CK14_cytoplasm','Ki67_nuclei','CK19_cytoplasm','CK5_cytoplasm','HER2_cellmem25',\n",
+    "#        'Ecad_cytoplasm', 'ER_nuclei','CD44_perinuc5', 'PCNA_nuclei','aSMA_perinuc5','CD3_perinuc5','DAPI2_nuclei']\n",
+    "#can scene 7 be good control for CD3 and CK14 and CK5?, yes. R1 doen't add much\n",
+    "ls_R4 = [ 'CK14_cytoplasm', 'CK5_cytoplasm','CK17_cytoplasm'] #'CD3_perinuc5',\n",
+    "ls_R4_scene = ['scene02','scene07']\n",
+    "ls_bad = ['CD20_perinuc5', 'CD31_perinuc5', 'CD4_perinuc5', 'CD68_perinuc5', 'CD8_perinuc5','PD1_perinuc5']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set(df_mi.batch)\n",
+    "#df_mi = df_mi.loc[df_mi.batch!='JE-TMA-60']\n",
+    "df_mi['slide_scene'] = df_mi.scene\n",
+    "df_mi['scene'] = [item.split('_')[1] for item in df_mi.slide_scene]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#dropped 60\n",
+    "df_norm_all=pd.DataFrame(index=df_mi.dropna().index)\n",
+    "\n",
+    "#not dropped 60\n",
+    "df_norm_all=pd.DataFrame(index=df_mi.index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#1 fit on scenes that are good through round 10 and markers that are positive on those scenes \"pos\"\n",
+    "for s_type in ['R4','R5','R10']:\n",
+    "    if s_type == 'R10':\n",
+    "        ls_pos = ls_R10\n",
+    "        ls_scene = ls_R10_scene\n",
+    "\n",
+    "    #2 fit on scenes that are good until R4, and R1-4 markers\n",
+    "    if s_type == 'R4':\n",
+    "        ls_pos = ls_R4\n",
+    "        ls_scene = ls_R4_scene # + ls_R5_scene + ls_R10_scene \n",
+    "\n",
+    "    #3 fit on scene that are good until R5, and R5 markers\n",
+    "    if s_type == 'R5':\n",
+    "        ls_pos = ls_R5\n",
+    "        ls_scene = ls_R5_scene\n",
+    "\n",
+    "    #fit\n",
+    "    b_control = ((df_mi.index.str.contains('JE-TMA')) & (df_mi.scene.isin(ls_scene)) & (df_mi.loc[:,ls_pos].isna().sum(axis=1)==0))\n",
+    "    data = df_mi.loc[b_control,ls_pos].T\n",
+    "    batch = df_mi.loc[b_control,'batch']\n",
+    "    gamma_star, delta_star, stand_mean, var_pooled = normalize.combat_fit(data, batch)\n",
+    "    #transform\n",
+    "    #data = df_mi.loc[df_mi.batch!='SMTBx1-16',df_mi.dtypes=='float64'].drop(['DAPI_X','DAPI_Y'],axis=1).T\n",
+    "    data = df_mi.loc[df_mi.batch!='SMTBx1-16',ls_pos].T\n",
+    "    batch = df_mi.loc[df_mi.batch!='SMTBx1-16','batch']\n",
+    "    bayesdata = normalize.combat_transform(data,batch,gamma_star,delta_star,stand_mean, var_pooled)\n",
+    "    df_norm = bayesdata.T\n",
+    "    df_norm_all = df_norm_all.merge(df_norm,left_index=True,right_index=True,how='left')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_norm_all.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run after #1, 2 and 3\n",
+    "df_norm_all =  df_norm_all.merge(df_mi.loc[:,['batch','DAPI_X','DAPI_Y','scene','nuclei_area','nuclei_eccentricity']],left_index=True,right_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#old check\n",
+    "df_norm = df_norm.merge(df_mi.loc[:,['batch','DAPI_X','DAPI_Y','scene','nuclei_area','nuclei_eccentricity']],left_index=True,right_index=True)\n",
+    "#df_mi.loc[b_control,:].drop(['DAPI_X','DAPI_Y'],axis=1).groupby('batch').mean()\n",
+    "#df_mi[df_mi.index.str.contains('JE-TMA')].drop(['DAPI_X','DAPI_Y'],axis=1).groupby('batch').std()\n",
+    "#check\n",
+    "df_norm.loc[b_control,:].drop(['DAPI_X','DAPI_Y'],axis=1).groupby('batch').mean()\n",
+    "#df_norm[df_norm.index.str.contains('JE-TMA')].drop(['DAPI_X','DAPI_Y'],axis=1).groupby('batch').std()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#df_norm_all.to_csv('20210320_SMTBx2-4_JE-TMA-43_60_62_normalized.csv')\n",
+    "#df_norm_all.to_csv('20210325_SMTBx2-4_JE-TMA-43_60_62_normalized.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Umap Visualize Normalization <a name=\"normviz\"></a>\n",
+    "\n",
+    "[contents](#contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#s_sample = '20210320_SMTBx2-4_JE-TMA-43_60_62'\n",
+    "s_sample = '20210325_SMTBx2-4_JE-TMA-43_60_62'\n",
+    "df_norm_all = pd.read_csv(f'{s_sample}_normalized.csv',index_col=0)\n",
+    "df_norm_all.rename({'nuclei_area':'area','nuclei_eccentricity':'eccentricity','DAPI_X':'DAPIX',\n",
+    "       'DAPI_Y':\"DAPIY\"},axis=1, inplace=True)\n",
+    "df_norm_all.columns = [item.split('_')[0] for item in df_norm_all.columns]\n",
+    "df_norm_all['slide'] = [item.split('_')[0] for item in df_norm_all.index]\n",
+    "df_norm_all['scene'] = [item.split('_')[1] for item in df_norm_all.index]\n",
+    "df_norm_all['slide_scene'] = [item.split('_cell')[0] for item in df_norm_all.index]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_norm_all = df_norm_all.loc[~df_norm_all.slide_scene.isin(['JE-TMA-43_scene01','JE-TMA-62_scene01'])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# visualize\n",
+    "%matplotlib inline\n",
+    "s_type = 'w-60_no01'\n",
+    "#adata = sc.AnnData(df_norm_all.loc[:,df_norm_all.dtypes=='float64'].drop(['DAPIX','DAPIY'],axis=1)) \n",
+    "ls_drop = ['DAPIX','DAPIY','DAPI2','LamAC','pHH3','FoxP3','CoxIV',\n",
+    "          'H3K27','H3K4','pRB','pS6RP','aSMA','PDPN'] #aSMA, PDPN not well norm\n",
+    "adata = sc.AnnData(df_norm_all.dropna().loc[:,df_norm_all.dtypes=='float64'].drop(ls_drop,axis=1))\n",
+    "adata.obs['batch'] = df_norm_all.dropna().loc[:,'batch']\n",
+    "adata.obs['scene'] =  df_norm_all.dropna().loc[:,'scene'].replace({'scene001':'Bx', 'scene002':'Bx','scene003':'Bx', 'scene004':'Bx', 'scene005':'Bx'})\n",
+    "adata.obs['tissue'] = df_norm_all.dropna().loc[:,'slide']\n",
+    "# reduce dimensionality (PCA)\n",
+    "adata.raw = adata\n",
+    "#reduce dimensionality\n",
+    "sc.tl.pca(adata, svd_solver='auto')\n",
+    "#sc.pl.pca(adata)\n",
+    "sc.pl.pca_variance_ratio(adata, log=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate neighbors\n",
+    "n_neighbors = 31\n",
+    "n_pcs=len(adata.var.index) - 1\n",
+    "results_file =  f'{s_sample}_{n_neighbors}neighbors_{n_pcs}pcs_{len(adata.var.index)}markers.h5ad'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d_celline = {'scene02':'HCC1143',\n",
+    " 'scene03':'HCC3153',\n",
+    " 'scene04':'N.Breast',\n",
+    " 'scene05':'T47D',\n",
+    " 'scene06':'T47D',\n",
+    " 'scene07':'Tonsil',\n",
+    " 'scene08':'BT474',\n",
+    " 'scene09':'BT474',\n",
+    " 'scene10':'AU565',\n",
+    " 'scene11':'AU565',\n",
+    " 'scene12':'MDAMB436',\n",
+    " 'scene13':'MDAMB436',\n",
+    " 'scene14':'MDAMB436'}\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# calculate neighbors\n",
+    "if os.path.exists(results_file):\n",
+    "    adata = sc.read_h5ad(results_file)\n",
+    "    print('loading umap')\n",
+    "else:\n",
+    "    # calculate neighbors \n",
+    "    print('calculating umap')\n",
+    "    sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)\n",
+    "    sc.tl.umap(adata)\n",
+    "    #save results\n",
+    "    if not os.path.exists(results_file):\n",
+    "        adata.write(results_file)\n",
+    "\n",
+    "# umap plus scenes\n",
+    "fig,ax = plt.subplots(figsize=(3,2.5),dpi=600)\n",
+    "figname = f'UmapScene_{s_type}_{n_pcs+1}markers.png'\n",
+    "sc.pl.umap(adata, color='scene',save=figname,title=f'TMA Core',ax=ax)\n",
+    "\n",
+    "\n",
+    "# umap plus tissue\n",
+    "fig,ax = plt.subplots(figsize=(3,2.5),dpi=600)\n",
+    "figname = f'UmapTissue_{s_type}_{n_pcs+1}markers.png'\n",
+    "adata.obs['Tissue'] = adata.obs['tissue'].replace({'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3','SMTBx4-3':'Bx4'})\n",
+    "sc.pl.umap(adata, color='Tissue',save=figname,title=f'Tissue',ax=ax)\n",
+    "\n",
+    "\n",
+    "# umap plus cell line\n",
+    "adata.obs['Subtype'] = adata.obs.scene.replace(d_celline)\n",
+    "fig,ax = plt.subplots(figsize=(3,2.5),dpi=600)\n",
+    "figname = f'UmapSubtype_{s_type}_{n_pcs+1}markers.png'\n",
+    "sc.pl.umap(adata, color='Subtype',save=figname,title=f'Subtype',ax=ax)\n",
+    "\n",
+    "\n",
+    "#umap plot\n",
+    "ls_marker = adata.var.index.tolist()\n",
+    "figname = f\"Umap_{s_type}_{n_pcs+1}markers.png\"\n",
+    "axes = sc.pl.umap(adata, color=ls_marker,wspace=.25,save=figname,vmin='p1.5',vmax='p98.5',ncols=3,show=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "#umap plot\n",
+    "ls_marker = adata.var.index.tolist()\n",
+    "figname = f\"Umap_{s_type}_{n_pcs+1}markers.png\"\n",
+    "fig = sc.pl.umap(adata, color=ls_marker,wspace=.25,vmin='p1.5',vmax='p98.5',ncols=3,show=False,return_fig=True)\n",
+    "ax_list = fig.axes\n",
+    "for ax in ax_list:\n",
+    "    ax.set_title(ax.get_title(),fontsize=28)\n",
+    "fig.savefig(f'figures/{figname}',dpi=600)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## cluster leiden <a name=\"clusterlei\"></a>\n",
+    "\n",
+    "[contents](#contents)\n",
+    "\n",
+    "cluster on the markers that are normalized well"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "resolution = 0.45\n",
+    "results_file = f'{s_sample}_{n_neighbors}neighbors_{n_pcs}pcs_{len(adata.var.index)}markers_leiden{resolution}.h5ad'\n",
+    "#save\n",
+    "if not os.path.exists(results_file):\n",
+    "    sc.tl.leiden(adata,resolution=resolution)\n",
+    "else:\n",
+    "    adata = sc.read_h5ad(results_file)\n",
+    "    print('loading leiden')    \n",
+    "fig,ax = plt.subplots(figsize=(3,2.5),dpi=600)\n",
+    "figname=f'leiden_{resolution}.png'\n",
+    "sc.pl.umap(adata, color='leiden',ax=ax,save=figname)\n",
+    "#seaborn clustermap\n",
+    "df_p = pd.DataFrame(data=adata.raw.X,index=adata.obs.index,columns=adata.var.index)\n",
+    "df_p['leiden'] = adata.obs['leiden']\n",
+    "g = sns.clustermap(df_p.groupby('leiden').mean(),z_score=1,figsize=(4,4),cmap='viridis',\n",
+    "                   vmin=-1.5,vmax=1.5) \n",
+    "#g.savefig(f'./figures/clustermap_leiden.png',dpi=200)\n",
+    "marker_genes = df_p.groupby('leiden').mean().iloc[:,g.dendrogram_col.reordered_ind].columns.tolist()\n",
+    "categories_order = df_p.groupby('leiden').mean().iloc[g.dendrogram_row.reordered_ind,:].index.tolist()\n",
+    "#scanpy matrixplot\n",
+    "fig,ax = plt.subplots(figsize=(5,5), dpi=200)\n",
+    "figname=f'Matrixplot_leiden_{resolution}.png'\n",
+    "sc.pl.matrixplot(adata, var_names=marker_genes, groupby=f'leiden',title='',categories_order=categories_order,\n",
+    "                 ax=ax,save=figname,standard_scale='var',colorbar_title='Relative\\nintensity',\n",
+    "                #var_group_positions=[(3,23),(24,31),(32,42),(43,51)],\n",
+    "                 #var_group_labels=['tumor','T-cell','muscle\\n +AF','immune\\n+stroma'],\n",
+    "                #var_group_rotation=0\n",
+    "                )\n",
+    "\n",
+    "#save\n",
+    "if not os.path.exists(results_file):\n",
+    "    adata.write(results_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Leiden barplots <a name=\"clust1\"></a>\n",
+    "\n",
+    "\n",
+    "[contents](#contents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_order = [\n",
+    "       'Bx2','Bx3','Bx4',#'JE-TMA-43_scene01','JE-TMA-62_scene01',\n",
+    "       'JE-TMA-43_scene02', 'JE-TMA-62_scene02',\n",
+    "       'JE-TMA-43_scene03', 'JE-TMA-62_scene03', 'JE-TMA-43_scene04',\n",
+    "       'JE-TMA-62_scene04', 'JE-TMA-43_scene05', 'JE-TMA-62_scene05',\n",
+    "       'JE-TMA-43_scene06','JE-TMA-60_scene06', 'JE-TMA-62_scene06', 'JE-TMA-43_scene07',\n",
+    "       'JE-TMA-62_scene07', 'JE-TMA-43_scene08','JE-TMA-60_scene08', 'JE-TMA-62_scene08',\n",
+    "       'JE-TMA-43_scene09','JE-TMA-60_scene09', 'JE-TMA-62_scene09','JE-TMA-43_scene10', 'JE-TMA-62_scene10','JE-TMA-60_scene10',\n",
+    "       'JE-TMA-43_scene11', 'JE-TMA-60_scene11', 'JE-TMA-62_scene11', 'JE-TMA-43_scene13',\n",
+    "       'JE-TMA-62_scene12', 'JE-TMA-43_scene14','JE-TMA-60_scene13', 'JE-TMA-62_scene13']      "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_order_r = ls_order[::-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load original\n",
+    "'''\n",
+    "s_sample = '20210320_SMTBx2-4_JE-TMA-43_60_62'\n",
+    "n_neighbors = 30\n",
+    "n_pcs = 19\n",
+    "n_markers = n_pcs+1\n",
+    "resolution = 0.5\n",
+    "results_file = f'{s_sample}_{n_neighbors}neighbors_{n_pcs}pcs_{n_markers}markers_leiden{resolution}.h5ad'\n",
+    "adata1 = sc.read_h5ad(results_file) \n",
+    "\n",
+    "d_cluster = {'14': '14: Basal',\n",
+    "'5': '5: T cell',\n",
+    "'12': '12: T cell',\n",
+    "'10': '10: Myoepithelial',\n",
+    "'1': '1: Mesenchymal',\n",
+    "'16': '16: Prolif.',\n",
+    "'15': '15: Vim+ FB (Bx3)',\n",
+    "'11': '11: Vim+ FB (Bx4)',\n",
+    "'13': '13: Vim+ FB (Bx2)',\n",
+    "'7': '7: HER2++',\n",
+    "'9': '9: EGFR+ Basal',\n",
+    "'3': '3: HER2+',\n",
+    "'8': '8: HER2++, Ecad-',\n",
+    "'0': '0: ER+ (Bx4)',\n",
+    "'2': '2: ER+, PCNA+ ',\n",
+    "'4': '4: ER+, EGFR+ (Bx3)',\n",
+    "'6': '6: ER+ (Bx2)'}\n",
+    "d_clust_names = dict(zip([item[0] for item in d_cluster.items()],[item[1].split(': ')[1] for item in d_cluster.items()]))\n",
+    "'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load\n",
+    "s_sample = '20210325_SMTBx2-4_JE-TMA-43_60_62'\n",
+    "n_neighbors = 31\n",
+    "n_pcs = 17\n",
+    "n_markers = n_pcs+1\n",
+    "resolution = 0.45\n",
+    "results_file = f'{s_sample}_{n_neighbors}neighbors_{n_pcs}pcs_{n_markers}markers_leiden{resolution}.h5ad'\n",
+    "adata1 = sc.read_h5ad(results_file) \n",
+    "print(results_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if resolution == 0.5:\n",
+    "    d_cluster = {'14': '14: Basal','12': '12: T cell','16': '16: Prolif.','7': '7: ER+ (Bx2)','13': '13: Luminal (N.Breast)',\n",
+    "    '1': '1: ER+ PCNA+ (T47D)','0': '0: ER+ (Bx4)','15': '15: ER+ CK8++ (Bx4)','4': '4: ER+, EGFR+ (Bx3)','18': '18: ER+, EGFR+ (Bx3)',\n",
+    "    '17': '17: (Bx3)','10': '10: FB (Bx4)','11': '11: FB (Bx2)','3': '3: CD44+','9': '9: CD44+', '8': '8: EGFR+ Basal',\n",
+    "    '5': '5: HER2++','6': '6: HER2+','2': '2: HER2++, Ecad-',}\n",
+    "elif resolution == 0.45:\n",
+    "    d_cluster = {'15':'15: Basal',\n",
+    "                 '12':'12: T cell',\n",
+    "                 '16': '16: prolif.',\n",
+    "                 '5':'5: ER+, EGFR+ (Bx3)',\n",
+    "                '0':'0: ER+ (Bx4)',\n",
+    "                '1':'1: ER+, PCNA+',\n",
+    "                '7':'7: ER- (Bx2)',\n",
+    "                '9':'9: ER+ (Bx2)',\n",
+    "                '8':'8: EGFR+ Basal',\n",
+    "                '4':'4: HER2+',\n",
+    "                '3':'3: HER2+',\n",
+    "                '6':'6: HER2+, Ecad-',\n",
+    "                '2':'2: Mesenchymal',\n",
+    "                '10':'10: Mesenchymal',\n",
+    "                '14':'14: fibroblast',\n",
+    "                '11':'11: fibroblast',\n",
+    "                '13':'13: fibroblast'}\n",
+    "d_clust_names = dict(zip([item[0] for item in d_cluster.items()],[item[1].split(': ')[1] for item in d_cluster.items()]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "mpl.rcParams['ps.fonttype'] = 42\n",
+    "#sns.set(font_scale=1.19)\n",
+    "#seaborn clustermap\n",
+    "df_p = pd.DataFrame(data=adata1.raw.X,index=adata1.obs.index,columns=adata1.var.index)\n",
+    "df_p['leiden'] = adata1.obs['leiden']\n",
+    "g = sns.clustermap(df_p.groupby('leiden').mean().rename({'eccentricity':'eccen.'},axis=1).rename(d_cluster, axis=0),\n",
+    "                   z_score=1,figsize=(6.2,6),cmap='viridis',\n",
+    "                   vmin=-2,vmax=2,cbar_pos=(.05, .89, .10, .05),cbar_kws={'orientation': 'horizontal','label':'Z-score'}) #(left, bottom, width, height),\n",
+    "g.savefig(f'./{s_date}/clustermap_leiden_{resolution}_{n_markers}.pdf',dpi=300)\n",
+    "g.savefig(f'./{s_date}/clustermap_leiden_{resolution}_{n_markers}.png',dpi=300)\n",
+    "marker_genes = df_p.groupby('leiden').mean().iloc[:,g.dendrogram_col.reordered_ind].columns.tolist()\n",
+    "categories_order = df_p.groupby('leiden').mean().iloc[g.dendrogram_row.reordered_ind,:].index.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# stacked bar vertical\n",
+    "\n",
+    "df = pd.DataFrame(data=adata1.raw.X,index=adata1.obs.index,columns=adata1.var.index)\n",
+    "df[f'leiden'] = [int(item) for item in adata1.obs.leiden]\n",
+    "s_markers = n_markers\n",
+    "k=resolution\n",
+    "\n",
+    "df['slide'] = [item.split('_')[0] for item in df.index]\n",
+    "df['slide_scene'] = [item.split('_cell')[0] for item in df.index]\n",
+    "df['slide_scene'] = df.slide_scene.replace({'SMTBx2-5_scene001':'Bx2', 'SMTBx2-5_scene002':'Bx2',\n",
+    "       'SMTBx3_scene004':'Bx3', 'SMTBx4-3_scene001':'Bx4',\n",
+    "       'SMTBx4-3_scene002':'Bx4'})#.replace(d_order)\n",
+    "df['scene'] = [item.split('_')[1] for item in df.index]\n",
+    "df_prop = (df.groupby([f'leiden','slide_scene']).PCNA.count())/(df.groupby(['slide_scene']).PCNA.count())\n",
+    "df_prop = df_prop.unstack().fillna(value=0).T\n",
+    "\n",
+    "fig,ax=plt.subplots(figsize=(5,6), dpi=200)\n",
+    "df_prop['slide'] =[item.split('_')[0] for item in df_prop.index]\n",
+    "#df_prop['scene'] =[item.split('_')[1] for item in df_prop.index]\n",
+    "df_prop = df_prop.loc[ls_order_r]\n",
+    "df_prop.columns = [str(item) for item in df_prop.columns]\n",
+    "#df_prop.rename(d_order).rename(d_cluster,axis=1).plot(kind='barh',stacked=True,ax=ax,legend=True,cmap='tab20',width=0.9)\n",
+    "df_prop.plot(kind='barh',stacked=True,ax=ax,legend=True,cmap='tab20',width=0.9)\n",
+    "ax.legend(bbox_to_anchor=(1.05, 1.00),ncol=1, fancybox=True,title='Cluster ID')\n",
+    "ax.set_xlabel('Fraction of Cells')\n",
+    "ax.set_ylabel('Tissue')\n",
+    "ax.set_title('')\n",
+    "plt.tight_layout()\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_vertical.pdf')\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_vertical.png')\n",
+    "#plt.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#save the cluster ID, not hte annotation\n",
+    "#df_prop.to_csv(f'{s_sample}_{n_markers}markers_leiden{resolution}_frac_pos.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.ticker as tic\n",
+    "#SMT\n",
+    "fig,ax=plt.subplots(figsize=(2.8,3.2),dpi=200)\n",
+    "df_plot = df_prop.loc[['Bx2','Bx3','Bx4'],df_prop.dtypes=='float64'].T[::-1]\n",
+    "df_plot.plot(kind='barh',ax=ax,legend=True,width=.9)\n",
+    "ax.legend(title='Bx', loc='upper left',fancybox=True,borderpad=.2,bbox_to_anchor=(1.05, 1.05))\n",
+    "ax.set_xlabel('Fraction of Cells')\n",
+    "ax.set_ylabel('')\n",
+    "fig.suptitle(f'Cluster Composition: Biopsies',x=.5, y=.92)\n",
+    "for tick in ax.yaxis.get_major_ticks():\n",
+    "    tick.tick1line.set_markersize(0)\n",
+    "    tick.tick2line.set_markersize(0)\n",
+    "temp = tic.LinearLocator(numticks=18)\n",
+    "ax.yaxis.set_minor_locator(temp)\n",
+    "plt.grid(b=True, which='minor', axis='y')\n",
+    "plt.tight_layout()\n",
+    "fig.savefig(f'./{s_date}/Barplot_SMT{s_markers}_K{k}.pdf')\n",
+    "fig.savefig(f'./{s_date}/Barplot_SMT{s_markers}_K{k}.png')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls_order = ['Bx2', 'Bx3', 'Bx4','AU565-2','AU565-3', 'AU565-4', 'BT474-2','BT474-3', 'BT474-4', \n",
+    "       'HCC1143-2', 'HCC1143-4', 'HCC3153-2', 'HCC3153-4', #'JE-TMA-43_scene01','JE-TMA-62_scene01', 'JE-TMA-43_scene10',\n",
+    "        'MDAMB-436-2','MDAMB-436-3', 'MDAMB-436-4', 'T47D-2','T47D-3', 'T47D-4',\n",
+    "       'N.Breast-2', 'N.Breast-4', 'tonsil-2', 'tonsil-4']\n",
+    "d_order = {#'\n",
+    "       'JE-TMA-43_scene02':'HCC1143-2', 'JE-TMA-62_scene02':'HCC1143-4',\n",
+    "       'JE-TMA-43_scene03':'HCC3153-2', 'JE-TMA-62_scene03':'HCC3153-4', 'JE-TMA-43_scene04':'N.Breast-2',\n",
+    "       'JE-TMA-62_scene04':'N.Breast-4', 'JE-TMA-43_scene05':'T47D-2', 'JE-TMA-62_scene05':'T47D-4',\n",
+    "       'JE-TMA-43_scene06':'T47D-2', 'JE-TMA-62_scene06':'T47D-4', 'JE-TMA-43_scene07':'tonsil-2',\n",
+    "       'JE-TMA-62_scene07':'tonsil-4', 'JE-TMA-43_scene08':'BT474-2', 'JE-TMA-62_scene08':'BT474-4',\n",
+    "       'JE-TMA-43_scene09':'BT474-2', 'JE-TMA-62_scene09':'BT474-4',  'JE-TMA-43_scene10':'AU565-2','JE-TMA-62_scene10':'AU565-4',\n",
+    "       'JE-TMA-43_scene11':'AU565-2', 'JE-TMA-62_scene11':'AU565-4', 'JE-TMA-43_scene13':'MDAMB-436-2',\n",
+    "       'JE-TMA-62_scene12':'MDAMB-436-4', 'JE-TMA-43_scene14':'MDAMB-436-2', 'JE-TMA-62_scene13':'MDAMB-436-4',\n",
+    "       'JE-TMA-60_scene13':'MDAMB-436-3', 'JE-TMA-60_scene11':'AU565-3', 'JE-TMA-60_scene10':'AU565-3',\n",
+    "       'JE-TMA-60_scene09':'BT474-3', 'JE-TMA-60_scene08':'BT474-3', 'JE-TMA-60_scene06':'T47D-3'}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "#stacked bar vertical tissue\n",
+    "df['coreID'] = df.slide_scene.replace(d_order)\n",
+    "df['celltype'] = df.leiden.astype('str').replace(d_clust_names)\n",
+    "df_prop = (df.groupby([f'celltype','coreID']).PCNA.count())/(df.groupby(['coreID']).PCNA.count())\n",
+    "df_prop = df_prop.unstack().fillna(value=0).T\n",
+    "\n",
+    "fig,ax=plt.subplots(figsize=(5,3.7), dpi=200)\n",
+    "df_prop['slide'] =[item.split('_')[0] for item in df_prop.index]\n",
+    "ls_order_r = ls_order[::-1]\n",
+    "df_prop = df_prop.loc[ls_order_r]\n",
+    "df_prop.columns = [str(item) for item in df_prop.columns]\n",
+    "df_prop.plot(kind='barh',stacked=True,ax=ax,legend=True,cmap='tab20',width=0.9) #.rename(d_order).rename(d_clust_names,axis=1)\n",
+    "ax.legend(loc='upper left', bbox_to_anchor=(1.1,1.02),ncol=1, fancybox=True,title='Cluster Annotation')\n",
+    "ax.set_xlabel('Fraction of Cells')\n",
+    "ax.set_ylabel('Tissue')\n",
+    "ax.set_title('Cluster Composition: Biopsies Plus Controls')\n",
+    "plt.tight_layout()\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_withcontrols_vert.pdf')\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_withcontrols_vert.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#stacked bar horizontal\n",
+    "df['coreID'] = df.slide_scene.replace(d_order)\n",
+    "df['celltype'] = df.leiden.astype('str').replace(d_clust_names)\n",
+    "df_prop = (df.groupby([f'celltype','coreID']).PCNA.count())/(df.groupby(['coreID']).PCNA.count())\n",
+    "df_prop = df_prop.unstack().fillna(value=0).T\n",
+    "\n",
+    "fig,ax=plt.subplots(figsize=(10,2.5), dpi=200)\n",
+    "df_prop['slide'] =[item.split('_')[0] for item in df_prop.index]\n",
+    "#df_prop['scene'] =[item.split('_')[1] for item in df_prop.index]\n",
+    "df_prop = df_prop.loc[ls_order]\n",
+    "df_prop.columns = [str(item) for item in df_prop.columns]\n",
+    "df_prop.plot(kind='bar',stacked=True,ax=ax,legend=True,cmap='tab20',width=0.9) #.rename(d_order).rename(d_clust_names,axis=1)\n",
+    "ax.legend(loc='upper center', bbox_to_anchor=(1.5, 1.05),ncol=2, fancybox=True,title='Cluster Annotation')\n",
+    "ax.set_ylabel('Fraction of Cells')\n",
+    "ax.set_xlabel('Tissue')\n",
+    "ax.set_title('')\n",
+    "plt.tight_layout()\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_withcontrols.pdf')\n",
+    "fig.savefig(f'./{s_date}/StackedBar_{s_markers}markers_{k}Clusters_withcontrols.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#plot all groups spatially  \n",
+    "from matplotlib.colors import ListedColormap, LinearSegmentedColormap\n",
+    "newcmap = ListedColormap(mpl.cm.tab20.colors)#ListedColormap(mpl.cm.tab20b.colors + mpl.cm.tab20c.colors)\n",
+    "from mplex_image import analyze\n",
+    "df_pos = analyze.celltype_to_bool(df_p,'leiden')\n",
+    "df_xy = df_mi.loc[df_pos.index]\n",
+    "ls_scene = ['SMTBx2-5_scene001', 'SMTBx3_scene004', 'SMTBx4-3_scene001', 'SMTBx4-3_scene002']\n",
+    "#ls_scene = ['JE-TMA-62_scene04', 'JE-TMA-43_scene04','JE-TMA-62_scene07','JE-TMA-43_scene07']\n",
+    "for s_slide in ls_scene:\n",
+    "    fig,ax = plt.subplots(figsize=(10,10),dpi=200) #10,10\n",
+    "    #plot negative cells\n",
+    "    df_scene = df_xy[df_xy.index.str.contains(s_slide)]\n",
+    "    ax.scatter(data=df_scene,x='DAPI_X',y='DAPI_Y',color='silver',s=0.1,label=f'')\n",
+    "    for idxs, s_color_int in enumerate(range(len(df_pos.columns))):\n",
+    "        s_color = str(s_color_int)\n",
+    "        if len(df_xy[(df_xy.slide_scene==s_slide) & (df_pos.loc[:,s_color])])>=1:\n",
+    "            #plot positive cells\n",
+    "            ax.scatter(data=df_xy[(df_xy.slide_scene==s_slide) & (df_pos.loc[:,s_color])],x='DAPI_X',y='DAPI_Y',\n",
+    "                                                                        label=f'{s_color}',s=0.1,color=newcmap.colors[idxs])\n",
+    "        #break\n",
+    "    ax.set_title(f\"{s_slide}\", fontsize=16)\n",
+    "    ax.axis('equal')\n",
+    "    ax.set_ylim(ax.get_ylim()[::-1])\n",
+    "    #ax.set_xticklabels('')\n",
+    "    #ax.set_yticklabels('')\n",
+    "    #break\n",
+    "    plt.legend(markerscale=10) \n",
+    "    fig.savefig(f'{codedir}/paper_data/GatingPlots/{s_slide}_clustering_scatterplot.png')\n",
+    "    #break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists(f'{s_sample}_{n_markers}markers_leiden{resolution}.csv'):\n",
+    "    print('saving csv')\n",
+    "    df.to_csv(f'{s_sample}_{n_markers}markers_leiden{resolution}.csv')\n",
+    "    df_prop.to_csv(f'{s_sample}_{n_markers}markers_leiden{resolution}_frac_pos.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f'{s_sample}_{n_markers}markers_leiden{resolution}.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f'{s_sample}_{n_markers}markers_leiden{resolution}_frac_pos.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3.9.5",
+   "language": "python",
+   "name": "python3.9.5"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/mplex_image/20210312_visualize.py b/mplex_image/20210312_visualize.py
new file mode 100755
index 0000000..f9f86b9
--- /dev/null
+++ b/mplex_image/20210312_visualize.py
@@ -0,0 +1,288 @@
+####
+# title: analyze.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to visualize cyclic data and analysis
+####
+
+#load libraries
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import os
+import skimage
+from skimage import io, segmentation
+import tifffile
+import copy
+import napari
+import seaborn as sns
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import scale
+
+#napari
+def load_crops(viewer,s_crop,s_tissue):
+    ls_color = ['blue','green','yellow','red','cyan','magenta','gray','green','yellow','red','cyan','magenta',
+     'gray','gray','gray','gray','gray','gray','gray','gray']
+    print(s_crop)
+    #viewer = napari.Viewer()
+    for s_file in os.listdir():
+        if s_file.find(s_tissue)>-1:
+            if s_file.find(s_crop) > -1:
+                if s_file.find('ome.tif') > -1:
+                    with tifffile.TiffFile(s_file) as tif:
+                        array = tif.asarray()
+                        omexml_string = tif.ome_metadata
+                        for idx in range(array.shape[0]):
+                            img = array[idx]
+                            i_begin = omexml_string.find(f'Channel ID="Channel:0:{idx}" Name="')
+                            i_end = omexml_string[i_begin:].find('" SamplesPerPixel')
+                            s_marker = omexml_string[i_begin + 31:i_begin + i_end]
+                            viewer.add_image(img,name=s_marker,rgb=False,visible=False,blending='additive',colormap=ls_color[idx],contrast_limits = (np.quantile(img,0),(np.quantile(img,0.9999)+1)*1.5))
+                elif s_file.find('SegmentationBasins') > -1:
+                    label_image = io.imread(s_file)
+                    viewer.add_labels(label_image, name='cell_seg',blending='additive',visible=False)
+                    cell_boundaries = segmentation.find_boundaries(label_image,mode='outer')
+                    viewer.add_labels(cell_boundaries,blending='additive')
+                else:
+                    label_image = np.array([])
+                    print('')
+    return(label_image)
+
+def pos_label(viewer,df_pos,label_image,s_cell):
+    '''
+    df_pos = boolean dataframe, s_cell = marker name 
+    '''
+    #s_cell = df_pos.columns[df_pos.columns.str.contains(f'{s_cell}_')][0]
+    #get rid of extra cells (filtered by DAPI, etc)
+    li_index = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos.index]
+    label_image_cell = copy.deepcopy(label_image)
+    label_image_cell[~np.isin(label_image_cell, li_index)] = 0
+    li_index_cell = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos[df_pos.loc[:,s_cell]==True].index]
+    label_image_cell[~np.isin(label_image_cell,li_index_cell )] = 0
+    viewer.add_labels(label_image_cell, name=f'{s_cell.split("_")[0]}_seg',blending='additive',visible=False)
+    return(label_image_cell)
+
+#jupyter notbook
+#load manual thresholds
+def new_thresh_csv(df_mi,d_combos):
+    #make thresh csv's
+    df_man = pd.DataFrame(index= ['global']+ sorted(set(df_mi.slide_scene)))
+    for s_type, es_marker in d_combos.items():
+        for s_marker in sorted(es_marker):
+            df_man[s_marker] = ''
+    return(df_man)
+
+def load_thresh_csv(s_sample):
+    #load
+    df_man = pd.read_csv(f'thresh_JE_{s_sample}.csv',header=0,index_col = 0)
+    #reformat the thresholds data and covert to 16 bit 
+    ls_index = df_man.index.tolist()
+    ls_index.remove('global')
+    df_thresh = pd.DataFrame(index = ls_index)
+    ls_marker = df_man.columns.tolist()
+    for s_marker in ls_marker:
+        df_thresh[f'{s_marker}_global'] = df_man[df_man.index=='global'].loc['global',f'{s_marker}']*256
+        df_thresh[f'{s_marker}_local'] = df_man[df_man.index!='global'].loc[:,f'{s_marker}']*256
+
+    df_thresh.replace(to_replace=0, value = 12, inplace=True)
+    return(df_thresh)
+
+def threshold_postive(df_thresh,df_mi):
+    '''
+    #make positive dataframe to check threhsolds #start with local, and if its not there, inesrt the global threshold
+    #note, this will break if there are two biomarker locations #
+    '''
+    ls_scene = sorted(df_thresh.index.tolist())
+    ls_sub = df_mi.columns[df_mi.dtypes=='float64'].tolist()
+    ls_other = []
+    df_pos= pd.DataFrame()
+    d_thresh_record= {}
+    for s_scene in ls_scene:
+        ls_index = df_mi[df_mi.slide_scene==s_scene].index
+        df_scene = pd.DataFrame(index=ls_index)
+        for s_marker_loc in ls_sub:
+            s_marker = s_marker_loc.split('_')[0]
+            # only threshold markers in .csv
+            if len(set([item.split('_')[0] for item in df_thresh.columns]).intersection({s_marker})) != 0:
+                #first check if local threshold exists
+                if df_thresh[df_thresh.index==s_scene].isna().loc[s_scene,f'{s_marker}_local']==False:
+                    #local
+                    i_thresh = df_thresh.loc[s_scene,f'{s_marker}_local']
+                    df_scene.loc[ls_index,s_marker_loc] = df_mi.loc[ls_index,s_marker_loc] >= i_thresh
+                #otherwise use global
+                elif df_thresh[df_thresh.index==s_scene].isna().loc[s_scene,f'{s_marker}_global']==False:
+                    i_thresh = df_thresh.loc[s_scene,f'{s_marker}_global']
+                    df_scene.loc[ls_index,s_marker_loc] = df_mi.loc[ls_index,s_marker_loc] >= i_thresh
+                else:
+                    ls_other = ls_other + [s_marker]
+                    i_thresh = np.NaN
+                d_thresh_record.update({f'{s_scene}_{s_marker}':i_thresh})
+            else:
+                ls_other = ls_other + [s_marker]
+        df_pos = df_pos.append(df_scene)
+    print(f'Did not threshold {set(ls_other)}')
+    return(d_thresh_record,df_pos)
+
+def plot_positive(s_type,d_combos,df_pos,d_thresh_record,df_xy,b_save=True):
+    ls_color = sorted(d_combos[s_type])
+    ls_bool = [len(set([item.split('_')[0]]).intersection(set(ls_color)))==1 for item in df_pos.columns]
+    ls_color = df_pos.columns[ls_bool].tolist()
+    ls_scene = sorted(set(df_xy.slide_scene))
+    ls_fig = []
+    for s_scene in ls_scene:
+        #negative cells = all cells even before dapi filtering
+        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
+        #plot
+        fig, ax = plt.subplots(2, ((len(ls_color))+1)//2, figsize=(18,12)) #figsize=(18,12)
+        ax = ax.ravel()
+        for ax_num, s_color in enumerate(ls_color):
+            s_marker = s_color.split('_')[0]
+            s_min = d_thresh_record[f"{s_scene}_{s_marker}"]
+            #positive cells = positive cells based on threshold
+            ls_pos_index = (df_pos[df_pos.loc[:,s_color]]).index
+            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
+            if len(df_color_pos)>=1:
+                #plot negative cells
+                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
+                #plot positive cells
+                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
+                      
+                ax[ax_num].axis('equal')
+                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
+                ax[ax_num].set_title(f'{s_marker} min={int(s_min)} ({len(df_color_pos)} cells)')
+            else:
+                ax[ax_num].set_title(f'{s_marker} min={(s_min)} ({(0)} cells')
+        fig.suptitle(s_scene)
+        ls_fig.append(fig)
+        if b_save:
+            fig.savefig(f'./SpatialPlots/{s_scene}_{s_type}_manual.png')
+    return(ls_fig)
+
+#gating analysis
+def prop_positive(df_data,s_cell,s_grouper):
+    #df_data['countme'] = True
+    df_cell = df_data.loc[:,[s_cell,s_grouper,'countme']].dropna()
+    df_prop = (df_cell.groupby([s_cell,s_grouper]).countme.count()/df_cell.groupby([s_grouper]).countme.count()).unstack().T
+    return(df_prop)
+
+def prop_clustermap(df_prop,df_annot,i_thresh,lut,figsize=(10,5)):
+    for s_index in df_prop.index:
+        s_subtype = df_annot.loc[s_index,'ID'] #
+        df_prop.loc[s_index, 'ID'] = s_subtype
+    species = df_prop.pop("ID")
+    row_colors = species.map(lut)
+
+    #clustermap plot wihtout the low values -drop less than i_threh % of total
+    df_plot = df_prop.fillna(0)
+    if i_thresh > 0:
+        df_plot_less = df_plot.loc[:,df_plot.sum()/len(df_plot) > i_thresh]
+    i_len = len(df_prop)
+    i_width = len(df_plot_less.columns)
+    g = sns.clustermap(df_plot_less,figsize=figsize,cmap='viridis',row_colors=row_colors)
+    return(g,df_plot_less)
+
+def prop_barplot(df_plot_less,s_cell,colormap="Spectral",figsize=(10,5),b_sort=True):
+    i_len = len(df_plot_less)
+    i_width = len(df_plot_less.columns)
+    fig,ax = plt.subplots(figsize=figsize)
+    if b_sort:
+        df_plot_less = df_plot_less.sort_index(ascending=False)
+    df_plot_less.plot(kind='barh',stacked=True,width=.9, ax=ax,colormap=colormap)
+    ax.set_title(s_cell)
+    ax.set_xlabel('Fraction Positive')
+    ax.legend(bbox_to_anchor=(1.01, 1))
+    plt.tight_layout()
+    return(fig)
+
+def plot_color_leg(lut,figsize = (2.3,3)):
+    #colors
+    series = pd.Series(lut)
+    df_color = pd.DataFrame(index=range(len(series)),columns=['subtype','color'])
+
+    series.sort_values()
+    df_color['subtype'] = series.index
+    df_color['value'] = 1
+    df_color['color'] = series.values
+
+    fig,ax = plt.subplots(figsize = figsize,dpi=100)
+    df_color.plot(kind='barh',x='subtype',y='value',width=1,legend=False,color=df_color.color,ax=ax)
+    ax.set_xticks([])
+    ax.set_ylabel('')
+    ax.set_title(f'subtype')
+    plt.tight_layout()
+    return(fig)
+
+#cluster analysis
+
+def cluster_kmeans(df_mi,ls_columns,k,b_sil=False):
+    '''
+    log2 transform, zscore and kmens cluster
+    '''
+    df_cluster_norm = df_mi.loc[:,ls_columns]
+    df_cluster_norm_one = df_cluster_norm + 1
+    df_cluster = np.log2(df_cluster_norm_one)
+
+    #select figure size
+    i_len = k
+    i_width = len(df_cluster.columns)
+
+    #scale date
+    df_scale = scale(df_cluster)
+
+    #kmeans cluster
+    kmeans = KMeans(n_clusters=k, random_state=0).fit(df_scale)
+    df_cluster.columns = [item.split('_')[0] for item in df_cluster.columns]
+    df_cluster[f'K{k}'] = list(kmeans.labels_)
+    g = sns.clustermap(df_cluster.groupby(f'K{k}').mean(),cmap="RdYlGn_r",z_score=1,figsize=(3+i_width/3,3+i_len/3))
+    if b_sil:
+        score = silhouette_score(X = df_scale, labels=list(kmeans.labels_))
+    else:
+        score = np.nan
+    return(g,df_cluster,score)
+
+def plot_clusters(df_cluster,df_xy,s_num='many'):
+    s_type = df_cluster.columns[df_cluster.dtypes=='int64'][0]
+    print(s_type)
+    ls_scene = sorted(set(df_cluster.slide_scene))
+    ls_color = sorted(set(df_cluster.loc[:,s_type].dropna()))
+    d_fig = {}
+    for s_scene in ls_scene:
+        #negative cells = all cells even before dapi filtering
+        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
+        #plot
+        if s_num == 'many':
+            fig, ax = plt.subplots(3, ((len(ls_color))+2)//3, figsize=(18,12),dpi=200)
+        else:
+            fig, ax = plt.subplots(2, 1, figsize=(7,4),dpi=200)	
+        ax = ax.ravel()
+        for ax_num, s_color in enumerate(ls_color):
+            s_marker = s_color
+            #positive cells = poitive cells based on threshold
+            ls_pos_index = (df_cluster[df_cluster.loc[:,s_type]==s_color]).index
+            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
+            if len(df_color_pos)>=1:
+                #plot negative cells
+                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
+                #plot positive cells
+                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
+                  
+                ax[ax_num].axis('equal')
+                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
+                if s_num == 'many':
+                    ax[ax_num].set_xticklabels('')
+                    ax[ax_num].set_yticklabels('')
+                ax[ax_num].set_title(f'{s_color} ({len(df_color_pos)} cells)')
+            else:
+                ax[ax_num].set_xticklabels('')
+                ax[ax_num].set_yticklabels('')
+                ax[ax_num].set_title(f'{s_color}  ({(0)} cells')
+        
+        fig.suptitle(s_scene)
+        d_fig.update({s_scene:fig})
+    return(d_fig)
diff --git a/mplex_image/__init__.py b/mplex_image/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/mplex_image/__pycache__/__init__.cpython-37.pyc b/mplex_image/__pycache__/__init__.cpython-37.pyc
new file mode 100755
index 0000000..e9e21ea
Binary files /dev/null and b/mplex_image/__pycache__/__init__.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/__init__.cpython-38.pyc b/mplex_image/__pycache__/__init__.cpython-38.pyc
new file mode 100755
index 0000000..95b1ebc
Binary files /dev/null and b/mplex_image/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/__init__.cpython-39.pyc b/mplex_image/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..b8859ba
Binary files /dev/null and b/mplex_image/__pycache__/__init__.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/analyze.cpython-37.pyc b/mplex_image/__pycache__/analyze.cpython-37.pyc
new file mode 100755
index 0000000..2c8fcb1
Binary files /dev/null and b/mplex_image/__pycache__/analyze.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/analyze.cpython-38.pyc b/mplex_image/__pycache__/analyze.cpython-38.pyc
new file mode 100755
index 0000000..ff95f60
Binary files /dev/null and b/mplex_image/__pycache__/analyze.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/analyze.cpython-39.pyc b/mplex_image/__pycache__/analyze.cpython-39.pyc
new file mode 100644
index 0000000..842d212
Binary files /dev/null and b/mplex_image/__pycache__/analyze.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/cmif.cpython-37.pyc b/mplex_image/__pycache__/cmif.cpython-37.pyc
new file mode 100755
index 0000000..5e4ca2b
Binary files /dev/null and b/mplex_image/__pycache__/cmif.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/cmif.cpython-38.pyc b/mplex_image/__pycache__/cmif.cpython-38.pyc
new file mode 100755
index 0000000..571f31b
Binary files /dev/null and b/mplex_image/__pycache__/cmif.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/cmif.cpython-39.pyc b/mplex_image/__pycache__/cmif.cpython-39.pyc
new file mode 100755
index 0000000..3742d85
Binary files /dev/null and b/mplex_image/__pycache__/cmif.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/codex.cpython-37.pyc b/mplex_image/__pycache__/codex.cpython-37.pyc
new file mode 100755
index 0000000..6438d19
Binary files /dev/null and b/mplex_image/__pycache__/codex.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/codex.cpython-38.pyc b/mplex_image/__pycache__/codex.cpython-38.pyc
new file mode 100755
index 0000000..0010b93
Binary files /dev/null and b/mplex_image/__pycache__/codex.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/features.cpython-37.pyc b/mplex_image/__pycache__/features.cpython-37.pyc
new file mode 100755
index 0000000..c9df747
Binary files /dev/null and b/mplex_image/__pycache__/features.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/features.cpython-38.pyc b/mplex_image/__pycache__/features.cpython-38.pyc
new file mode 100755
index 0000000..c869dfe
Binary files /dev/null and b/mplex_image/__pycache__/features.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/features.cpython-39.pyc b/mplex_image/__pycache__/features.cpython-39.pyc
new file mode 100755
index 0000000..ed790ee
Binary files /dev/null and b/mplex_image/__pycache__/features.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/gating.cpython-38.pyc b/mplex_image/__pycache__/gating.cpython-38.pyc
new file mode 100755
index 0000000..93c662f
Binary files /dev/null and b/mplex_image/__pycache__/gating.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/gating.cpython-39.pyc b/mplex_image/__pycache__/gating.cpython-39.pyc
new file mode 100644
index 0000000..88ac253
Binary files /dev/null and b/mplex_image/__pycache__/gating.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/getdata.cpython-37.pyc b/mplex_image/__pycache__/getdata.cpython-37.pyc
new file mode 100755
index 0000000..59ac9ef
Binary files /dev/null and b/mplex_image/__pycache__/getdata.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/getdata.cpython-38.pyc b/mplex_image/__pycache__/getdata.cpython-38.pyc
new file mode 100755
index 0000000..83ae205
Binary files /dev/null and b/mplex_image/__pycache__/getdata.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/getdata.cpython-39.pyc b/mplex_image/__pycache__/getdata.cpython-39.pyc
new file mode 100755
index 0000000..d77f944
Binary files /dev/null and b/mplex_image/__pycache__/getdata.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/imagine.cpython-37.pyc b/mplex_image/__pycache__/imagine.cpython-37.pyc
new file mode 100755
index 0000000..306fa6b
Binary files /dev/null and b/mplex_image/__pycache__/imagine.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/imagine.cpython-38.pyc b/mplex_image/__pycache__/imagine.cpython-38.pyc
new file mode 100755
index 0000000..49741f4
Binary files /dev/null and b/mplex_image/__pycache__/imagine.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/metadata.cpython-37.pyc b/mplex_image/__pycache__/metadata.cpython-37.pyc
new file mode 100755
index 0000000..ec53895
Binary files /dev/null and b/mplex_image/__pycache__/metadata.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/metadata.cpython-38.pyc b/mplex_image/__pycache__/metadata.cpython-38.pyc
new file mode 100755
index 0000000..862b8f4
Binary files /dev/null and b/mplex_image/__pycache__/metadata.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/mics.cpython-38.pyc b/mplex_image/__pycache__/mics.cpython-38.pyc
new file mode 100755
index 0000000..2b21d7a
Binary files /dev/null and b/mplex_image/__pycache__/mics.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/mics.cpython-39.pyc b/mplex_image/__pycache__/mics.cpython-39.pyc
new file mode 100755
index 0000000..68abfea
Binary files /dev/null and b/mplex_image/__pycache__/mics.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/mpimage.cpython-37.pyc b/mplex_image/__pycache__/mpimage.cpython-37.pyc
new file mode 100755
index 0000000..7694f6a
Binary files /dev/null and b/mplex_image/__pycache__/mpimage.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/mpimage.cpython-38.pyc b/mplex_image/__pycache__/mpimage.cpython-38.pyc
new file mode 100755
index 0000000..25b868a
Binary files /dev/null and b/mplex_image/__pycache__/mpimage.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/mpimage.cpython-39.pyc b/mplex_image/__pycache__/mpimage.cpython-39.pyc
new file mode 100755
index 0000000..93be7a7
Binary files /dev/null and b/mplex_image/__pycache__/mpimage.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/normalize.cpython-38.pyc b/mplex_image/__pycache__/normalize.cpython-38.pyc
new file mode 100755
index 0000000..432c2cd
Binary files /dev/null and b/mplex_image/__pycache__/normalize.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/normalize.cpython-39.pyc b/mplex_image/__pycache__/normalize.cpython-39.pyc
new file mode 100755
index 0000000..376a0fc
Binary files /dev/null and b/mplex_image/__pycache__/normalize.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/ometiff.cpython-37.pyc b/mplex_image/__pycache__/ometiff.cpython-37.pyc
new file mode 100755
index 0000000..575debe
Binary files /dev/null and b/mplex_image/__pycache__/ometiff.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/ometiff.cpython-38.pyc b/mplex_image/__pycache__/ometiff.cpython-38.pyc
new file mode 100755
index 0000000..b3dbb77
Binary files /dev/null and b/mplex_image/__pycache__/ometiff.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/ometiff.cpython-39.pyc b/mplex_image/__pycache__/ometiff.cpython-39.pyc
new file mode 100755
index 0000000..789526e
Binary files /dev/null and b/mplex_image/__pycache__/ometiff.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/preprocess.cpython-37.pyc b/mplex_image/__pycache__/preprocess.cpython-37.pyc
new file mode 100755
index 0000000..61224ba
Binary files /dev/null and b/mplex_image/__pycache__/preprocess.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/preprocess.cpython-38.pyc b/mplex_image/__pycache__/preprocess.cpython-38.pyc
new file mode 100755
index 0000000..14db79b
Binary files /dev/null and b/mplex_image/__pycache__/preprocess.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/preprocess.cpython-39.pyc b/mplex_image/__pycache__/preprocess.cpython-39.pyc
new file mode 100755
index 0000000..a629aca
Binary files /dev/null and b/mplex_image/__pycache__/preprocess.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/process.cpython-37.pyc b/mplex_image/__pycache__/process.cpython-37.pyc
new file mode 100755
index 0000000..a2ab185
Binary files /dev/null and b/mplex_image/__pycache__/process.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/process.cpython-38.pyc b/mplex_image/__pycache__/process.cpython-38.pyc
new file mode 100755
index 0000000..18d3893
Binary files /dev/null and b/mplex_image/__pycache__/process.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/process.cpython-39.pyc b/mplex_image/__pycache__/process.cpython-39.pyc
new file mode 100755
index 0000000..5a6c4e6
Binary files /dev/null and b/mplex_image/__pycache__/process.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/register.cpython-37.pyc b/mplex_image/__pycache__/register.cpython-37.pyc
new file mode 100755
index 0000000..6b120c4
Binary files /dev/null and b/mplex_image/__pycache__/register.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/register.cpython-38.pyc b/mplex_image/__pycache__/register.cpython-38.pyc
new file mode 100755
index 0000000..1590041
Binary files /dev/null and b/mplex_image/__pycache__/register.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/register.cpython-39.pyc b/mplex_image/__pycache__/register.cpython-39.pyc
new file mode 100755
index 0000000..d5a71c0
Binary files /dev/null and b/mplex_image/__pycache__/register.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/segment.cpython-37.pyc b/mplex_image/__pycache__/segment.cpython-37.pyc
new file mode 100755
index 0000000..3204988
Binary files /dev/null and b/mplex_image/__pycache__/segment.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/segment.cpython-38.pyc b/mplex_image/__pycache__/segment.cpython-38.pyc
new file mode 100755
index 0000000..d6e2cbc
Binary files /dev/null and b/mplex_image/__pycache__/segment.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/segment.cpython-39.pyc b/mplex_image/__pycache__/segment.cpython-39.pyc
new file mode 100755
index 0000000..9015372
Binary files /dev/null and b/mplex_image/__pycache__/segment.cpython-39.pyc differ
diff --git a/mplex_image/__pycache__/visualize.cpython-37.pyc b/mplex_image/__pycache__/visualize.cpython-37.pyc
new file mode 100755
index 0000000..77489bc
Binary files /dev/null and b/mplex_image/__pycache__/visualize.cpython-37.pyc differ
diff --git a/mplex_image/__pycache__/visualize.cpython-38.pyc b/mplex_image/__pycache__/visualize.cpython-38.pyc
new file mode 100755
index 0000000..4f6e116
Binary files /dev/null and b/mplex_image/__pycache__/visualize.cpython-38.pyc differ
diff --git a/mplex_image/__pycache__/visualize.cpython-39.pyc b/mplex_image/__pycache__/visualize.cpython-39.pyc
new file mode 100755
index 0000000..d1843c1
Binary files /dev/null and b/mplex_image/__pycache__/visualize.cpython-39.pyc differ
diff --git a/mplex_image/_version.py b/mplex_image/_version.py
new file mode 100755
index 0000000..6526deb
--- /dev/null
+++ b/mplex_image/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.7"
diff --git a/mplex_image/analyze.py b/mplex_image/analyze.py
new file mode 100755
index 0000000..2887c41
--- /dev/null
+++ b/mplex_image/analyze.py
@@ -0,0 +1,300 @@
+####
+# title: analyze.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to analyze cyclic data and images after manual thresholding
+####
+
+#load libraries
+import matplotlib as mpl
+mpl.use('agg')
+import pandas as pd
+import numpy as np
+import os
+import skimage
+from skimage import io
+import json
+from biotransistor import imagine
+import itertools
+
+#functions
+# import importlib
+# importlib.reload(analyze)
+
+def combinations(df_tn_tumor,ls_marker=['CK19_Ring','CK7_Ring','CK5_Ring','CK14_Ring','CD44_Ring','Vim_Ring']):
+    '''
+    get all combinations of the markers (can be overlapping)
+    '''
+    ls_combos = []
+    for i in range(0,len(ls_marker)):
+        for tu_combo in itertools.combinations(ls_marker,i+1):#'Ecad_Ring',
+            ls_combos.append(tu_combo)
+
+    #create the combos dataframe dataframe
+    df_tn_counts = pd.DataFrame(index=df_tn_tumor.index) 
+    se_all = set(ls_marker)
+
+    #combos of 2 or more
+    for tu_combo in ls_combos:
+        print(tu_combo)
+        se_pos = df_tn_tumor[(df_tn_tumor.loc[:,tu_combo].sum(axis=1) ==len(tu_combo))] #those are pos
+        se_neg = df_tn_tumor[(df_tn_tumor.loc[:,(se_all)].sum(axis=1) == len(tu_combo))] #and only those
+        df_tn_counts['_'.join([item for item in tu_combo])] = df_tn_tumor.index.isin(se_pos.index.intersection(se_neg.index))
+    
+    #other cells (negative for all)
+    df_tn_counts['__'] = df_tn_counts.loc[:,df_tn_counts.dtypes=='bool'].sum(axis=1)==0
+    if sum(df_tn_counts.sum(axis=1)!=1) !=0:
+        print('error in analyze.combinations')
+
+    return(df_tn_counts)
+
+def gated_combinations(df_data,ls_gate,ls_marker):
+    '''
+    df_data = boolean cell type dataframe
+    ls_gate = combine each of these cell types (full coverage and non-overlapping)
+    ls_marker = with these cell tpyes (full coverage and non-overlapping)
+    '''
+    es_all = set(ls_marker + ls_gate)
+    ls_old = df_data.columns
+    df_gate_counts = pd.DataFrame()
+    for s_gate in ls_gate:
+        df_tn_tumor = df_data[df_data.loc[:,s_gate]]
+        print(f'{s_gate} {len(df_tn_tumor)}')
+        #combos of 2
+        if len(df_tn_tumor) >=1:
+            for s_marker in ls_marker:
+                print(s_marker)
+                tu_combo = (s_gate,s_marker)
+                es_neg = es_all - set(tu_combo)
+                if ~df_data.loc[:,tu_combo].all(axis=1).any():
+                    df_gate_counts[f"{s_gate}_{s_marker}"] = False
+                else:
+                    df_gate_counts[f"{s_gate}_{s_marker}"] = df_data.loc[:,tu_combo].all(axis=1) & ~df_data.loc[:,es_neg].any(axis=1)
+    df_gate_counts.fillna(value=False, inplace=True)
+    return(df_gate_counts) 
+
+def add_celltype(df_data, ls_cell_names, s_type_name):
+    '''
+    add gated cell type to data frame, and save the possible cell typesand cell type name in a csv
+    df_data = data frame with the cell types (boolean)
+    ls_cell_names = list of the cell names
+    s_type_name = the cell category
+    '''
+    #check cell types' exclusivity
+    if ((df_data.loc[:,ls_cell_names].sum(axis=1)>1)).sum()!=0:
+        print(f'Error in exclusive cell types: {s_type_name}')
+
+    #make cell type object columns
+    for s_marker in ls_cell_names:
+        df_data.loc[(df_data[df_data.loc[:,s_marker]]).index,s_type_name] = s_marker
+    d_record = {s_type_name:ls_cell_names}
+
+    #append the record json
+    if not os.path.exists('./Gating_Record.json'):
+        with open(f'Gating_Record.json','w') as f: 
+            json.dump(d_record, f, indent=4, sort_keys=True)
+    else:
+        with open('Gating_Record.json','r') as f:
+            d_current = json.load(f)
+        d_current.update(d_record)
+        with open(f'Gating_Record.json','w') as f: 
+            json.dump(d_current, f, indent=4, sort_keys=True)
+
+def thresh_meanint(df_thresh,d_crop={},s_thresh='minimum',):
+    """
+    threshold, and output positive and negative mean intensity and array
+    df_thresh = dataframe of images with columns having image attributes
+        and index with image names, column with threshold values
+    d_crop = image scene and crop coordinates
+
+    """
+    d_mask = {}
+    for idx, s_index in enumerate(df_thresh.index):
+        #load image, crop, thresh
+        a_image = skimage.io.imread(s_index)
+        if len(d_crop) != 0:
+            tu_crop = d_crop[df_thresh.loc[s_index,'scene']]
+            a_image = a_image[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        i_min = df_thresh.loc[s_index,s_thresh]
+        a_mask = a_image > i_min
+        print(f'mean positive intensity = {np.mean(a_image[a_mask])}')
+        df_thresh.loc[s_index,'meanpos'] = np.mean(a_image[a_mask])
+        b_mask = a_image < i_min
+        print(f'mean negative intensity = {np.mean(a_image[b_mask])}')
+        df_thresh.loc[s_index,'meanneg'] = np.mean(a_image[b_mask])
+        d_mask.update({s_index:a_mask})
+    return(df_thresh,d_mask)
+
+def mask_meanint(df_img, a_mask):
+    '''
+    for each image in dataframe of image (df_img)
+    calculate mean intensity in pixels in mask (a_mask)
+    '''
+
+    #for each image, calculate mean intensity in the masked area
+    for s_index in df_img.index:
+        a_img = skimage.io.imread(s_index)
+        a_img_total = a_img[a_mask]
+        i_img_meanint = a_img_total.sum()/a_img_total.size
+        df_img.loc[s_index,'result'] = i_img_meanint
+    return(df_img)
+
+def make_border(s_sample,df_pos,ls_color,segmentdir,savedir,b_images=True,s_find = 'Cell Segmentation Basins.tif',s_split='Scene '): 
+    """
+    load positive cells dataframe, and segmentation basins
+    output the borders od positive cells and the cells touching dictionary
+    """
+    #load segmentation basins 
+    #flattens ids into a set (stored in d_flatten)
+    os.chdir(segmentdir)
+    ls_file = os.listdir()
+    ls_cellseg = []
+
+    # list of Basin files
+    for s_file in ls_file:
+        if s_file.find(s_find)>-1:
+            if s_file.find(s_sample)>-1:
+                ls_cellseg.append(s_file)
+
+    d_flatten = {}
+    dd_touch = {}
+
+    for s_file in ls_cellseg:
+        s_scene_num = s_file.split(s_split)[1].split('_')[0].split(' ')[0]
+        print(s_file)
+        print(s_scene_num)
+        a_img = io.imread(s_file)
+        # get all cell ids that exist in the images
+        es_cell = set(a_img.flatten())
+        es_cell.remove(0)
+        s_scene = f'scene{s_scene_num}'
+        d_flatten.update({f'scene{s_scene_num}':es_cell})
+
+        #get a cell touching dictionary (only do this one (faster))
+        dd_touch.update({f'{s_sample}_{s_scene}':imagine.touching_cells(a_img, i_border_width=0)})
+
+        #s_type = 'Manual' 
+        if b_images:
+            #save png of cell borders (single tiffs)
+            for idx, s_color in enumerate(ls_color):
+                print(f'Processing {s_color}')
+                #positive cells = positive cells based on thresholds
+                #dataframe of all the positive cells
+                df_color_pos = df_pos[df_pos.loc[:,s_color]]
+                ls_index = df_color_pos.index.tolist()
+ 
+                if len(df_color_pos[(df_color_pos.scene==s_scene)])>=1:
+                    ls_index = df_color_pos[(df_color_pos.scene==s_scene)].index.tolist()
+                    es_cell_positive = set([int(s_index.split('cell')[-1]) for s_index in ls_index])
+
+                    # erase all non positive basins
+                    es_cell_negative = d_flatten[s_scene].difference(es_cell_positive)
+                    a_pos = np.copy(a_img)
+                    a_pos[np.isin(a_img, list(es_cell_negative))] = 0   # bue: this have to be a list, else it will not work!
+
+                    # get cell border (a_pos_border)
+                    a_pos_border = imagine.get_border(a_pos)  # border has value 1
+                    a_pos_border = np.uint16(a_pos_border * 65000)  # border will have value 255
+                    #filename hack
+                    print('saving image')
+                    io.imsave(f'{savedir}/Registered-R{idx+100}_{s_color.replace("_",".")}.border.border.border_{df_color_pos.index[0].split("_")[0]}-{s_scene.replace("scene","Scene-")}_c2_ORG.tif',a_pos_border)
+                else:
+                    print(len(df_color_pos[(df_color_pos.scene==s_scene)]))
+    #from elmar (reformat cells touching dictionary and save
+
+    ddes_image = {}
+    for s_image, dei_image in dd_touch.items():
+        des_cell = {}
+        for i_cell, ei_touch in dei_image.items():
+            des_cell.update({str(i_cell): [str(i_touch) for i_touch in sorted(ei_touch)]})
+        ddes_image.update({s_image:des_cell})
+
+    #save dd_touch as json file
+    with open(f'result_{s_sample}_cellstouching_dictionary.json','w') as f: 
+        json.dump(ddes_image, f)
+    return(ddes_image)
+
+def make_border_all(s_sample,df_pos,segmentdir,savedir,b_images=True):
+    """
+    load positive cells dataframe, and segmentation basins
+    output the borders od positive cells and the cells touching dictionary
+    """
+    #Specify which images to save
+    #ls_color = df_pos.columns.tolist()
+    #ls_color.remove('DAPI_X')
+    #ls_color.remove('DAPI_Y')
+    #ls_color.remove('scene')
+
+    #load segmentation basins 
+    #flattens ids into a set (stored in d_flatten)
+    os.chdir(segmentdir)
+    ls_file = os.listdir()
+    ls_cellseg = []
+    d_files = {}
+    #dictionary of file to scene ID , and a list of Basin files
+    for s_file in ls_file:
+        if s_file.find('Cell Segmentation Basins.tif')>-1:
+            if s_file.find(s_sample)>-1:
+                ls_cellseg.append(s_file)
+                s_scene_num = s_file.split(' ')[1]
+                d_files.update({f'scene{s_scene_num}':s_file})
+
+    d_flatten = {}
+    dd_touch = {}
+
+    for s_file in ls_cellseg:
+        s_scene_num = s_file.split(' ')[1]
+        print(s_file)
+        a_img = skimage.io.imread(s_file)
+        # get all cell ids that exist in the images
+        es_cell = set(a_img.flatten())
+        es_cell.remove(0)
+        s_scene = f'scene{s_scene_num}'
+        d_flatten.update({f'scene{s_scene_num}':es_cell})
+
+        #get a cell touching dictionary (only do this one (faster))
+        dd_touch.update({f'{s_sample}_{s_scene}':imagine.touching_cells(a_img, i_border_width=0)})
+
+        #s_type = 'Manual' 
+        if b_images:
+                idx=0
+            #save png of all cell borders (single tiffs)
+            #for idx, s_color in enumerate(ls_color):
+            #    print(f'Processing {s_color}')
+                #positive cells = positive cells based on thresholds
+                #dataframe of all the positive cells
+                df_color_pos = df_pos #[df_pos.loc[:,s_color]]
+                ls_index = df_color_pos.index.tolist()
+ 
+                if len(df_color_pos[(df_color_pos.scene==s_scene)])>=1:
+                    ls_index = df_color_pos[(df_color_pos.scene==s_scene)].index.tolist()
+                    es_cell_positive = set([int(s_index.split('cell')[-1]) for s_index in ls_index])
+
+                    # erase all non positive basins
+                    es_cell_negative = d_flatten[s_scene].difference(es_cell_positive)
+                    a_pos = np.copy(a_img)
+                    a_pos[np.isin(a_img, list(es_cell_negative))] = 0   # bue: this have to be a list, else it will not work!
+
+                    # get cell border (a_pos_border)
+                    a_pos_border = imagine.get_border(a_pos)  # border has value 1
+                    a_pos_border = a_pos_border.astype(np.uint8)
+                    a_pos_border = a_pos_border * 255  # border will have value 255
+                    #filename hack 2019-11-27
+                    skimage.io.imsave(f'{savedir}/R{idx+100}_all.all_{df_color_pos.index[0].split("_")[0]}-{s_scene.replace("scene","Scene-")}_border_c3_ORG.tif',a_pos_border)
+
+def celltype_to_bool(df_data, s_column):
+    """
+    Input a dataframe and column name of cell tpyes
+    Output a new boolean dataframe with each col as a cell type
+    """
+    df_bool = pd.DataFrame(index=df_data.index)
+    for celltype in sorted(set(df_data.loc[:,s_column])):
+        df_bool.loc[df_data[df_data.loc[:,s_column]==celltype].index,celltype] = True
+    df_bool = df_bool.fillna(value=False)
+    df_data.columns = [str(item) for item in df_data.columns]
+    return(df_bool)
\ No newline at end of file
diff --git a/mplex_image/cmif.py b/mplex_image/cmif.py
new file mode 100755
index 0000000..62367dc
--- /dev/null
+++ b/mplex_image/cmif.py
@@ -0,0 +1,705 @@
+# wrapper functions for cmIF image processing
+
+from mplex_image import preprocess, mpimage, getdata, process, features, register, ometiff
+import copy
+import time
+import os
+import numpy as np
+import shutil
+import subprocess
+import pandas as pd
+import math
+from itertools import compress
+import skimage
+import sys
+import re
+from skimage import io
+from skimage.util import img_as_uint
+import tifffile
+
+#set src path (CHANGE ME)
+s_src_path = '/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF'
+s_work_path = '/home/groups/graylab_share/Chin_Lab/ChinData/Work/engje'
+
+
+def parse_czi(czidir,type='r',b_scenes=True):
+    """
+    parse .czi's written in koei's naming convention
+    type = 's' for stitched
+    """
+    cwd = os.getcwd()
+    #go to directory
+    os.chdir(czidir)
+    df_img = mpimage.filename_dataframe(s_end = ".czi",s_start='R',s_split='_')
+    df_img['slide'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+    if type=='s':
+        df_img['slide'] = [item[5] for item in [item.split('_') for item in df_img.index]]
+    df_img['rounds'] = [item[0] for item in [item.split('_') for item in df_img.index]]
+    df_img['markers'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+    if b_scenes:
+        try:
+            df_img['scene'] = [item[1].split('.')[0] for item in [item.split('Scene-') for item in df_img.index]]
+        except IndexError:
+            print(f"{set([item[0] for item in [item.split('Scene-') for item in df_img.index]])}")
+        df_img['scanID'] = [item[-1].split('-Scene')[0] for item in [item.split('__') for item in df_img.index]]
+    os.chdir(cwd)
+    return(df_img)
+
+def parse_stitched_czi(czidir,s_slide,b_scenes=True):
+    '''
+    parse .czi's wtitten in koei's naming convention, with periods changed to undescores
+    '''
+    cwd = os.getcwd()
+    #go to directory
+    os.chdir(czidir)
+    df_img = mpimage.filename_dataframe(s_end = ".czi",s_start='R',s_split='_').rename({'data':'rounds'},axis=1)
+    df_img['markers'] = [item[0] for item in [item.split(f'_{s_slide}') for item in df_img.index]]
+    for s_index in df_img.index:
+        df_img.loc[s_index,'markers_un'] = df_img.loc[s_index,'markers'].split(f"{df_img.loc[s_index,'rounds']}_")[1]
+    df_img['markers'] = df_img.markers_un.str.replace('_','.')
+    df_img.slide = s_slide
+    if b_scenes:
+        df_img['scene'] = [item[1].split('-')[0] for item in [item.split('Scene-') for item in df_img.index]]
+    os.chdir(cwd)
+    return(df_img)
+
+def count_images(df_img):
+    """
+    count and list slides, scenes, rounds
+    """
+    for s_sample in sorted(set(df_img.slide)):
+        print(s_sample)
+        df_img_slide = df_img[df_img.slide==s_sample]
+        print('scene names')
+        [print(f'{item}: {sum(df_img_slide.scene==item)}') for item in sorted(set(df_img_slide.scene))]
+        print(f'Number of images = {len(df_img_slide)}')
+        print(f'Rounds:')
+        [print(f'{item}: {sum(df_img_slide.rounds==item)}') for item in sorted(set(df_img_slide.rounds))]
+        print('\n')
+
+def visualize_raw_images(df_img,qcdir,color='c1'):
+    """
+    array raw images to check tissue identity, focus, etc.
+    """
+    for s_sample in sorted(set(df_img.slide)):
+        print(s_sample)
+
+        df_img_slide = df_img[df_img.slide==s_sample]
+        for s_scene in sorted(set(df_img_slide.scene)):
+            print(s_scene)
+            df_dapi = df_img_slide[(df_img_slide.color==color) & (df_img_slide.scene==s_scene)].sort_values(['round_ord','rounds'])
+            fig = mpimage.array_img(df_dapi,s_xlabel='slide',ls_ylabel=['scene','color'],s_title='rounds',tu_array=(2,len(df_dapi)//2+1),tu_fig=(24,10))
+            fig.savefig(f'{qcdir}/RawImages/{s_sample}-Scene-{s_scene}_{color}_all.png')
+
+def registration_python(s_sample,tiffdir,regdir,qcdir):
+    print(f'Registering {s_sample}')
+    preprocess.cmif_mkdir([f'{qcdir}/RegistrationPlots/'])
+    os.chdir(f'{tiffdir}/{s_sample}')
+    df_img = mpimage.parse_org(s_end = "ORG.tif",type='raw')
+    df_img['round_ord'] = [int(re.sub('[^0-9]','', item)) for item in df_img.rounds] 
+    df_img = df_img.sort_values(['round_ord','rounds','color','scene'])
+    for i_scene in sorted(set(df_img.scene)):
+        preprocess.cmif_mkdir([f'{regdir}/{s_sample}-Scene-{i_scene}'])
+        df_dapi = df_img[(df_img.color=='c1') & (df_img.scene==i_scene)]
+        target_file = df_dapi[df_dapi.rounds=='R1'].index[0]
+        target = io.imread(target_file)
+        for moving_file in df_dapi.index:
+            s_round = moving_file.split('_')[0]
+            moving_pts, target_pts, transformer = register.register(target_file,moving_file,b_plot=True)
+            for moving_channel in df_img[(df_img.rounds==s_round) & (df_img.scene==i_scene)].index:
+                moving = io.imread(moving_channel)
+                warped_img, warped_pts = register.apply_transform(moving, target, moving_pts, target_pts, transformer)
+                warped_img = img_as_uint(warped_img)
+                io.imsave(f"{regdir}/{s_sample}-Scene-{i_scene}/Registered-{moving_channel.split(s_sample)[0]}{s_sample}-Scene-{moving_channel.split('-Scene-')[1]}",warped_img)
+
+def run_registration_matlab(d_register, ls_order, tiffdir, regdir, N_colors='5'):
+    """
+    run registration on server with or without cropping
+    """
+    os.chdir(tiffdir)
+    shutil.copyfile(f'{s_src_path}/src/wrapper.sh', './wrapper.sh')
+    for s_sample, d_crop in d_register.items():
+        if len(d_crop) > 0:
+            print(f'Large registration {s_sample}')
+            for key, value in d_crop.items():
+                if len(str(key)) == 1:
+                    preprocess.cmif_mkdir([f'{regdir}/{s_sample.split("-Scene")[0]}-Scene-00{str(key)}'])
+                elif len(str(key)) == 2:
+                    preprocess.cmif_mkdir([f'{regdir}/{s_sample.split("-Scene")[0]}-Scene-0{str(key)}'])
+            preprocess.large_registration_matlab(N_smpl='10000',N_colors=N_colors,s_rootdir=tiffdir, s_subdirname=regdir,
+             d_crop_regions=d_crop, s_ref_id='./R1_*_c1_ORG.tif', ls_order=ls_order) 
+            MyOut = subprocess.Popen(['sbatch', 'wrapper.sh'], #the script runs fine
+             stdout=subprocess.PIPE,
+             stderr=subprocess.STDOUT)
+        #regular registration
+        else:
+            print(f'Regular registration {s_sample}')
+            df_img = mpimage.parse_org(s_end = "ORG.tif",type='raw')
+            df_img['slide_scene'] = df_img.slide + '-Scene-' + df_img.scene
+            preprocess.cmif_mkdir([(f'{regdir}/{item}') for item in sorted(set(df_img.slide_scene))]) #this will break with diff slides
+            preprocess.registration_matlab(N_smpl='10000',N_colors=N_colors,s_rootdir=tiffdir, s_subdirname=f'{regdir}/',
+             s_ref_id='./R1_*_c1_ORG.tif',ls_order =ls_order)
+            MyOut = subprocess.Popen(['sbatch', 'wrapper.sh'], #the script runs fine
+             stdout=subprocess.PIPE,
+             stderr=subprocess.STDOUT)
+
+def visualize_reg_images(regdir,qcdir,color='c1',s_sample=''):
+    """
+    array registered images to check tissue identity, focus, etc.
+    """
+    #check registration
+    preprocess.cmif_mkdir([f'{qcdir}/RegisteredImages'])
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find(s_sample) > -1:
+            os.chdir(s_dir)
+            s_sample_name = s_dir.split('-Scene')[0]
+            print(s_sample_name)
+            df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+            ls_scene = sorted(set(df_img.scene))
+            for s_scene in ls_scene:
+                print(s_scene)
+                df_img_scene = df_img[df_img.scene == s_scene]
+                df_img_stain = df_img_scene[df_img_scene.color==color]
+                df_img_sort = df_img_stain.sort_values(['round_ord','rounds'])
+                i_sqrt = math.ceil(math.sqrt(len(df_img_sort)))
+                fig = mpimage.array_img(df_img_sort,s_xlabel='marker',ls_ylabel=['scene','color'],s_title='rounds',tu_array=(2,len(df_img_sort)//2+1),tu_fig=(24,10))
+                #fig = mpimage.array_img(df_img_sort,s_column='color',s_row='rounds',s_label='scene',tu_array=(i_sqrt,i_sqrt),tu_fig=(16,14))
+                fig.savefig(f'{qcdir}/RegisteredImages/{s_scene}_registered_{color}.png')
+            os.chdir('..')
+    return(df_img_sort)
+
+def rename_files(d_rename,dir,b_test=True):
+    """
+    change file names
+    """
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{dir}/{s_dir}'
+        os.chdir(s_path)
+        #s_sample = s_dir.split('-Scene')[0]
+        print(s_dir)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+        es_wrong= preprocess.check_names(df_img)
+        if b_test:
+            print('This is a test')
+            preprocess.dchange_fname(d_rename,b_test=True)
+        elif b_test==False:
+            print('Changing name - not a test')
+            preprocess.dchange_fname(d_rename,b_test=False)
+        else:
+            pass
+
+def autofluorescence_subtract_dir(regdir,codedir,d_channel,ls_exclude,subdir,d_early={}):
+    '''
+    AF subtract images
+    '''
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        print(s_dir)
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        #preprocess.cmif_mkdir([f'{s_path}/AFSubtracted'])
+        s_sample = s_dir.split('-Scene')[0]
+        df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+        #load exposure times csv
+        df_exp = pd.read_csv(f'{codedir}/{s_sample}_ExposureTimes.csv',index_col=0,header=0)#
+        #AF subtract images
+        df_img_exp = mpimage.add_exposure(df_img,df_exp,type='czi')
+        if len(d_early)>0:
+            df_markers, df_copy = mpimage.subtract_scaled_images(df_img_exp,d_late=d_channel,
+                d_early=d_early, ls_exclude=ls_exclude,subdir=subdir,b_8bit=False)
+        else:
+            df_markers, df_copy = mpimage.subtract_images(df_img_exp,d_channel=d_channel,
+                ls_exclude=ls_exclude,subdir=subdir,b_8bit=False)
+
+    return(df_markers)
+
+def autofluorescence_subtract(s_sample,df_img,codedir,d_channel,ls_exclude,subdir,d_early={}):
+    '''
+    AF subtract images
+    '''
+    df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+    #load exposure times csv
+    df_exp = pd.read_csv(f'{codedir}/{s_sample}_ExposureTimes.csv',index_col=0,header=0)#
+    #AF subtract images
+    df_img_exp = mpimage.add_exposure(df_img,df_exp,type='czi')
+    if len(d_early)>0:
+            df_markers, df_copy = mpimage.subtract_scaled_images(df_img_exp,d_late=d_channel,
+                d_early=d_early, ls_exclude=ls_exclude,subdir=subdir,b_8bit=False)
+    else:
+            df_markers, df_copy = mpimage.subtract_images(df_img_exp,d_channel=d_channel,
+                ls_exclude=ls_exclude,subdir=subdir,b_8bit=False)
+
+    return(df_markers)
+
+def multipage_ome_tiff(d_combos,d_crop,tu_dim,s_dapi,regdir,b_crop=False):
+    '''
+    make custom overlays, either original of AF subtracted, save at 8 bit for size, and thresholding
+    '''
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        print(s_dir)
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",s_start='R',type='reg')
+        df_dapi =  df_img[df_img.marker.str.contains(s_dapi.split('_')[0])]
+        df_img_stain = df_img[(~df_img.marker.str.contains('DAPI'))]
+        #check
+        es_test = set()
+        for key, item in d_combos.items():
+            es_test = es_test.union(item)
+        print(set(df_img_stain.marker) - es_test)
+
+        #cropped
+        if b_crop:
+            s_scene = set(d_crop).intersection(set(df_img.scene))
+            d_crop_scene={k: d_crop[k] for k in (sorted(s_scene))}
+            process.custom_crop_overlays(d_combos,d_crop_scene, df_img,s_dapi, tu_dim=tu_dim) #df_dapi,
+        else:
+            process.custom_overlays(d_combos, df_img_stain, df_dapi)
+
+def visualize_multicolor_overlay(s_scene,subdir,qcdir,d_overlay,d_crop,es_bright,high_thresh):
+    s_sample = s_scene.split('-Scene')[0]
+    preprocess.cmif_mkdir([f'{qcdir}/{s_sample}'])
+    if os.path.exists(f'{subdir}/{s_sample}'):
+        s_path = f'{subdir}/{s_sample}'
+    elif os.path.exists(f'{subdir}/{s_scene}'):
+        s_path = f'{subdir}/{s_scene}'
+    os.chdir(s_path)
+    df_img = mpimage.parse_org()
+    df_img['path'] = [f'{s_path}/{item}' for item in df_img.index]
+    df_dapi_round = df_img[(df_img.color=='c1')&(df_img.scene==s_scene) & (df_img.rounds=='R2')]
+    df_scene = df_img[(df_img.color!='c1') & (df_img.scene==s_scene)]
+    for s_round,ls_marker  in d_overlay.items():
+        print(f'Generating multicolor overlay {[item for item in ls_marker]}')
+        df_round = df_scene[df_scene.marker.isin(ls_marker)]
+        high_thresh=0.999
+        d_overlay_round = {s_round:ls_marker}
+        d_result = mpimage.multicolor_png(df_round,df_dapi_round,s_scene=s_scene,d_overlay=d_overlay_round,d_crop=d_crop,es_dim={'nada'},es_bright=es_bright,low_thresh=2000,high_thresh=high_thresh)
+        for key, tu_result in d_result.items():
+            io.imsave(f'{qcdir}/{s_sample}/ColorArray_{s_scene}_{key}_{".".join(tu_result[0])}.png',tu_result[1])
+
+def cropped_ometiff(s_scene,subdir,cropdir,d_crop,d_combos,s_dapi,tu_dim,b_8bit=True):
+    s_sample = s_scene.split('-Scene')[0]
+    if os.path.exists(f'{subdir}/{s_sample}'):
+        os.chdir(f'{subdir}/{s_sample}')
+    elif os.path.exists(f'{subdir}/{s_scene}'):
+        os.chdir(f'{subdir}/{s_scene}')
+    df_img = mpimage.parse_org()
+    d_crop_scene = {s_scene:d_crop[s_scene]}
+    if b_8bit:
+        dd_result = mpimage.overlay_crop(d_combos,d_crop_scene,df_img,s_dapi,tu_dim)
+    else:
+        dd_result = mpimage.overlay_crop(d_combos,d_crop_scene,df_img,s_dapi,tu_dim,b_8bit=False)
+    for s_crop, d_result in dd_result.items():
+        for s_type, (ls_marker, array) in d_result.items():
+            print(f'Generating multi-page ome-tiff {[item for item in ls_marker]}')
+            new_array = array[np.newaxis,np.newaxis,:]
+            s_xml =  ometiff.gen_xml(new_array, ls_marker)
+            with tifffile.TiffWriter(f'{cropdir}/{s_crop}_{s_type}.ome.tif') as tif:
+                tif.save(new_array,  photometric = "minisblack", description=s_xml, metadata = None)
+
+def crop_registered(s_scene,bigdir,regdir,d_crop):
+    '''
+    crop a stack of tiffs to the specified coordinates
+    d_crop: crop to scene:(xmin, y_min, xmax, ymax)
+    '''
+    s_sample = s_scene.split('-Scene')[0]
+    print(s_scene)
+    os.chdir(f'{bigdir}/{s_scene}')
+    df_img = mpimage.parse_org()
+    df_scene = df_img[df_img.scene==s_scene]
+    for s_image in df_scene.index:
+        #print(s_image)
+        a_dapi = io.imread(s_image)
+        for idx, xy_cropcoor in d_crop.items():
+            #crop 
+            a_crop = a_dapi[xy_cropcoor[1]:xy_cropcoor[3],xy_cropcoor[0]:xy_cropcoor[2]]
+            preprocess.cmif_mkdir([f'{regdir}/{s_sample}-Scene-{idx:03}'])
+            io.imsave(f'{regdir}/{s_sample}-Scene-{idx:03}/{s_image.replace(s_scene,f"{s_sample}-Scene-{idx:03}")}',a_crop,check_contrast=False)
+
+def multipage_tiff(d_combos,d_crop,tu_dim,s_dapi,regdir,b_crop=False):
+    '''
+    make custom overlays, either original of AF subtracted, save at 8 bit for size, and thresholding
+    '''
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        print(s_dir)
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",s_start='R',type='reg')
+        df_dapi =  df_img[df_img.marker.str.contains(s_dapi.split('_')[0])]
+        df_img_stain = df_img[(~df_img.marker.str.contains('DAPI'))]
+        #check
+        es_test = set()
+        for key, item in d_combos.items():
+            es_test = es_test.union(item)
+        print(set(df_img_stain.marker) - es_test)
+
+        #cropped
+        if b_crop:
+            s_scene = set(d_crop).intersection(set(df_img.scene))
+            d_crop_scene={k: d_crop[k] for k in (sorted(s_scene))}
+            process.custom_crop_overlays(d_combos,d_crop_scene, df_img,s_dapi, tu_dim=tu_dim) #df_dapi,
+        else:
+            process.custom_overlays(d_combos, df_img_stain, df_dapi)
+
+def crop_basins(d_crop,tu_dim,segdir,cropdir,s_type='Cell'):
+    """
+    crop the segmentation basins (cell of nuceli) to same coord as images for veiwing in Napari
+    """
+    cwd = os.getcwd()
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        s_sample = s_scene.split('-Scene-')[0]
+        os.chdir(f'{segdir}/{s_sample}_Segmentation/')
+
+        for s_file in os.listdir():
+            if s_file.find(f'{s_type} Segmentation Basins.tif') > -1: #Nuclei Segmentation Basins.tif #Cell Segmentation Basins.tif
+                if s_file.find(s_scene.split('-Scene-')[1]) > -1:
+                    a_seg = skimage.io.imread(s_file)
+                    a_crop = a_seg[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+                    s_coor = f'x{xy_cropcoor[0]}y{xy_cropcoor[1]}.tif'
+                    #crop file
+                    s_file_new = f'{cropdir}/{s_sample}-{s_file.replace(" - ","_").replace(" ","").replace("Scene","Scene-").replace(".tif",s_coor)}'
+                    print(s_file_new)
+                    skimage.io.imsave(s_file_new,a_crop)
+    os.chdir(cwd)
+
+def load_crop_labels(d_crop,tu_dim,segdir,cropdir,s_find='Nuclei Segmentation Basins'):
+    """
+    crop the segmentation basins (cell of nuceli) to same coord as images for veiwing in Napari
+    s_find: 'exp5_CellSegmentationBasins' or 'Nuclei Segmentation Basins'
+    """
+    cwd = os.getcwd()
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        s_sample = s_scene.split('-Scene-')[0]
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation/')
+
+        for s_file in os.listdir():
+            if s_file.find(s_find) > -1: #Nuclei Segmentation Basins.tif #Cell Segmentation Basins.tif
+                if s_file.find(s_scene.split(s_sample)[1]) > -1:
+                    a_seg = skimage.io.imread(s_file)
+                    a_crop = a_seg[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+                    s_coor = f'x{xy_cropcoor[0]}y{xy_cropcoor[1]}.tif'
+                    #crop file
+                    s_file_new = f'{cropdir}/{s_file.replace(" ","").replace(".tif",s_coor)}'
+                    print(s_file_new)
+                    skimage.io.imsave(s_file_new,a_crop)
+    os.chdir(cwd)
+
+def load_labels(d_crop,segdir,s_find='Nuclei Segmentation Basins'):
+    """
+    load the segmentation basins (cell of nuceli) 
+    s_find: 'exp5_CellSegmentationBasins' or 'Nuclei Segmentation Basins'
+    """
+    d_label={}
+    cwd = os.getcwd()
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        s_sample = s_scene.split('-Scene-')[0]
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation/')
+        for s_file in os.listdir():
+            if s_file.find(s_find) > -1: #Nuclei Segmentation Basins.tif #Cell Segmentation Basins.tif
+                if s_file.find(s_scene.split(s_sample)[1]) > -1:
+                    a_seg = skimage.io.imread(s_file)
+                    d_label.update({s_scene:a_seg})
+    os.chdir(cwd)
+    return(d_label)
+
+def crop_labels(d_crop,d_label,tu_dim,cropdir,s_name='Nuclei Segmentation Basins'):
+    """
+    crop the segmentation basins (cell of nuceli) to same coord as images for veiwing in Napari
+    s_name = 
+    """
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        a_seg = d_label[s_scene]
+        a_crop = a_seg[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+        s_coor = f'x{xy_cropcoor[0]}y{xy_cropcoor[1]}.tif'
+        #crop file
+        s_file_new = f'{cropdir}/{s_name.replace(" ","").replace(".tif",s_coor)}'
+        print(s_file_new)
+        skimage.io.imsave(s_file_new,a_crop)
+
+
+#### OLD: for Guillaume's pipeline ###
+
+def copy_files(dir,dapi_copy, marker_copy,b_test=True):
+    """
+    copy and rename files if needed as dummies
+    """
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{dir}/{s_dir}'
+        os.chdir(s_path)
+        s_sample = s_dir.split('-Scene')[0]
+        df_img = mpimage.parse_org(s_end = "ORG.tif")
+        print(s_dir)
+        if b_test:
+            for key, dapi_item in dapi_copy.items():
+                preprocess.copy_dapis(s_r_old=key,s_r_new=f'-R{dapi_item}_',s_c_old='_c1_',s_c_new='_c2_',s_find='_c1_ORG.tif',b_test=True)
+            i_count=0
+            for idx,(key, item) in enumerate(marker_copy.items()):
+                preprocess.copy_markers(df_img, s_original=key, ls_copy = item,i_last_round= dapi_item + i_count, b_test=True)
+                i_count=i_count + len(item)
+        elif b_test==False:
+            print('Changing name - not a test')
+            for key, dapi_item in dapi_copy.items():
+                preprocess.copy_dapis(s_r_old=key,s_r_new=f'-R{dapi_item}_',s_c_old='_c1_',s_c_new='_c2_',s_find='_c1_ORG.tif',b_test=False)
+            i_count=0
+            for idx,(key, item) in enumerate(marker_copy.items()):
+                preprocess.copy_markers(df_img, s_original=key, ls_copy = item,i_last_round= dapi_item + i_count, b_test=False)
+                i_count=i_count + len(item)
+        else:
+            pass
+
+def segmentation_thresholds(regdir,qcdir, d_segment):
+    """
+    visualize binary mask of segmentaiton threholds
+    """
+    preprocess.cmif_mkdir([f'{qcdir}/Segmentation'])
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+        s_sample = s_dir.split('-Scene')[0]
+        print(s_sample)
+        if  (len(set(df_img.scene))) < 3:
+            d_seg = preprocess.check_seg_markers(df_img,d_segment, i_rows=1, t_figsize=(10,6)) #few scenes
+        elif  (len(set(df_img.scene))) > 8:
+            d_seg = preprocess.check_seg_markers(df_img,d_segment, i_rows=3, t_figsize=(10,6)) #more scenes
+        else:
+            d_seg = preprocess.check_seg_markers(df_img,d_segment, i_rows=2, t_figsize=(10,6)) #more scenes
+        for key, fig in d_seg.items():
+            fig.savefig(f'{qcdir}/Segmentation/{s_dir}_{key}_segmentation.png')
+
+def move_af_img(s_sample, regdir, subdir, dirtype='tma',b_move=False):
+    '''
+    dirtype = 'single' or 'tma' or 'unsub'
+    '''
+    #move
+    os.chdir(regdir)
+    for s_dir in sorted(os.listdir()):
+        if s_dir.find(s_sample)>-1:
+            if dirtype =='single':
+                preprocess.cmif_mkdir([f'{subdir}/{s_dir}'])
+            elif dirtype == 'tma':
+                preprocess.cmif_mkdir([f'{subdir}/{s_sample}'])
+            elif dirtype == 'unsub':
+                preprocess.cmif_mkdir([f'{subdir}/{s_sample}'])
+            if dirtype != 'unsub':
+                print(f'{regdir}/{s_dir}/AFSubtracted')
+                os.chdir(f'{regdir}/{s_dir}/AFSubtracted')
+            else:
+                os.chdir(f'{regdir}/{s_dir}')
+            for s_file in sorted(os.listdir()):
+                    if dirtype =='single':
+                        movedir = f'{subdir}/{s_dir}/{s_file}'
+                        print(f'{regdir}/{s_dir}/AFSubtracted/{s_file} moved to {movedir}')
+                    elif dirtype == 'tma':
+                        movedir = f'{subdir}/{s_sample}/{s_file}'
+                        print(f'{regdir}/{s_dir}/AFSubtracted/{s_file} moved to {movedir}')
+                    elif dirtype == 'unsub':
+                        movedir = f'{subdir}/{s_sample}/{s_file}'
+                        print(f'{regdir}/{s_dir}/{s_file} moved to {movedir}')
+                    if b_move:
+                        if dirtype != 'unsub':
+                            shutil.move(f'{regdir}/{s_dir}/AFSubtracted/{s_file}', f'{movedir}')
+                        else:
+                            shutil.move(f'{regdir}/{s_dir}/{s_file}', f'{movedir}')
+
+def extract_dataframe(s_sample, segdir,qcdir,i_rows=1):
+    '''
+    get mean intensity, centroid dataframes
+    '''
+    preprocess.cmif_mkdir([f'{qcdir}/Segmentation'])
+    #get data
+    os.chdir(segdir)
+    dd_run = getdata.get_df(s_folder_regex=f"^{s_sample}.*_Features$",es_value_label = {"MeanIntensity","CentroidY","CentroidX"})#
+    os.chdir(f'{s_sample}_Segmentation')
+    d_reg = process.check_seg(s_sample=s_sample,ls_find=['Cell Segmentation Full Color'], i_rows=i_rows, t_figsize=(8,8))#
+    for key, item in d_reg.items():
+        item.savefig(f'{qcdir}/Segmentation/FullColor_{key}.png')
+
+def metadata_table(regdir,segdir):
+    """
+    output channel/marker mapping
+    """
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+        if len(set(df_img.scene)) > 1:
+            df_img = df_img[df_img.scene==sorted(set(df_img.scene))[1]]
+            s_sample = s_dir
+        else:
+            s_sample = s_dir.split('-Scene')[0]
+        print(s_sample)
+        df_marker = df_img[df_img.color!='c1']
+        df_marker = df_marker.sort_values(['rounds','color'])
+        df_dapi = pd.DataFrame(index = [df_marker.marker.tolist()],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+        df_dapi['rounds'] = df_marker.loc[:,['rounds']].values
+        df_dapi['colors'] = df_marker.loc[:,['color']].values
+        df_dapi['minimum'] = 1003
+        df_dapi['maximum'] = 65535
+        df_dapi['exposure'] = 100
+        df_dapi['refexp'] = 100
+        df_dapi['location'] = 'All'
+        df_dapi.to_csv(f'{segdir}/metadata_{s_sample}_RoundsCyclesTable.csv',header=True)
+
+def segmentation_inputs(regdir,segdir, d_segment,tma_bool=False,b_start=False,i_counter=0,b_java=False):
+    """
+    make inputs for guillaumes segmentation
+    """
+
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.parse_org(s_end = "ORG.tif",type='reg')
+        if len(set(df_img.scene)) > 1:
+            df_img = df_img[df_img.scene==sorted(set(df_img.scene))[1]]
+            s_sample = s_dir
+        else:
+            s_sample = s_dir.split('-Scene')[0]
+        print(s_sample)
+        df_marker = df_img[df_img.color!='c1']
+        df_marker = df_marker.sort_values(['rounds','color'])
+        df_dapi = pd.DataFrame(index = [df_marker.marker.tolist()],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+        df_dapi['rounds'] = df_marker.loc[:,['rounds']].values
+        df_dapi['colors'] = df_marker.loc[:,['color']].values
+        df_dapi['minimum'] = 1003
+        df_dapi['maximum'] = 65535
+        df_dapi['exposure'] = 100
+        df_dapi['refexp'] = 100
+        df_dapi['location'] = 'All'
+        for s_key,i_item in d_segment.items():
+                df_dapi.loc[s_key,'minimum'] = i_item
+        df_dapi.to_csv(f'{segdir}/metadata_{s_sample}_RoundsCyclesTable.csv',header=True)
+        #create cluster.java file
+        if b_java:
+            df_dapi.to_csv('RoundsCyclesTable.txt',sep=' ',header=False)
+            preprocess.cluster_java(s_dir=f'JE{idx + i_counter}',s_sample=s_sample,imagedir=f'{s_path}',segmentdir=segdir,type='exacloud',b_segment=True,b_TMA=tma_bool)
+        if b_start:
+            os.chdir(f'{s_work_path}/exacloud/JE{idx}') #exacloud
+            #shutil.copyfile(f'{s_src_path}/src/javawrapper.sh', './javawrapper.sh')
+            print(f'JE{idx + i_counter}')
+            subprocess.run(["make"])
+            subprocess.run(["make", "slurm"])
+
+def prepare_dataframe(s_sample,ls_dapi,dapi_thresh,d_channel,ls_exclude,segdir,codedir,s_af='none', b_afsub=False):
+    '''
+    filter data by last dapi, standard location, subtract AF, output treshold csv
+    ls_dapi[0] becomes s_dapi
+    '''
+
+    os.chdir(f'{segdir}')
+    #load data
+    df_mi = process.load_mi(s_sample)
+    df_xy = process.load_xy(s_sample)
+    #drop extra centroid columns,add scene column
+    df_xy = df_xy.loc[:,['DAPI_X','DAPI_Y']]
+    df_xy = process.add_scene(df_xy)
+    df_xy.to_csv(f'features_{s_sample}_CentroidXY.csv')
+    #filter by last DAPI
+    df_dapi_mi = process.filter_dapi(df_mi,df_xy,ls_dapi[0],dapi_thresh,b_images=True)
+
+    #filter mean intensity by biomarker location in metadata
+    df_filter_mi, es_standard = process.filter_standard(df_dapi_mi,d_channel,s_dapi=ls_dapi[0])
+
+    df_filter_mi.to_csv(f'features_{s_sample}_FilteredMeanIntensity_{ls_dapi[0]}{dapi_thresh}.csv')
+    #background qunatiles
+    '''
+    df_bg = process.filter_background(df_mi, es_standard)
+    df_bg.to_csv(f'features_{s_sample}_BackgroundQuantiles.csv')
+    df_bg = process.filter_background(df_dapi_mi, es_standard)
+    df_bg.to_csv(f'features_{s_sample}_FilteredBackgroundQuantiles.csv')
+
+    df_t = pd.read_csv(f'metadata_{s_sample}_RoundsCyclesTable.csv',index_col=0,header=0)
+    df_exp = pd.read_csv(f'{codedir}/{s_sample}_ExposureTimes.csv',index_col=0,header=0)
+    df_tt = process.add_exposure_roundscyles(df_t, df_exp,es_standard, ls_dapi = ls_dapi)
+    df_tt.to_csv(f'metadata_{s_sample}_RoundsCyclesTable_ExposureTimes.csv')
+    if b_afsub:
+        #load metadata
+        df_t = pd.read_csv(f'metadata_{s_sample}_RoundsCyclesTable_ExposureTimes.csv',index_col=0,header=0)
+        #normalize by exposure time, and save to csv
+        lb_columns = [len(set([item]).intersection(set(df_t.index)))>0 for item in [item.split('_')[0] for item in df_filter_mi.columns]]
+        df_filter_mi = df_filter_mi.loc[:,lb_columns]
+        df_norm = process.exposure_norm(df_filter_mi,df_t)
+        df_norm.to_csv(f'features_{s_sample}_ExpNormalizedMeanIntensity_{ls_dapi[0]}{dapi_thresh}.csv')
+        #subtract AF channels in data
+        df_sub,ls_sub,ls_record = process.af_subtract(df_norm,df_t,d_channel,ls_exclude)
+        df_out = process.output_subtract(df_sub,df_t)
+        df_sub.to_csv(f'features_{s_sample}_AFSubtractedMeanIntensityNegative{s_af}_{ls_dapi[0]}{dapi_thresh}.csv')
+        df_out.to_csv(f'features_{s_sample}_AFSubtractedMeanIntensity{s_af}_{ls_dapi[0]}{dapi_thresh}.csv')
+        f = open(f"{s_sample}_AFsubtractionData_{s_af}.txt", "w")
+        f.writelines(ls_record)
+        f.close()
+    else:
+        df_out = df_filter_mi
+    #output thresholding csv
+    #df_out = process.add_scene(df_out) #df_out
+    #df_thresh = process.make_thresh_df(df_out,ls_drop=None)
+    #df_thresh.to_csv(f'thresh_XX_{s_sample}.csv')
+    '''
+    print('Done')
+
+def fetch_celllabel(s_sampleset, s_slide, s_ipath, s_opath = './', es_scene = None, es_filename_endswith ={'Cell Segmentation Basins.tif', 'Nuclei Segmentation Basins.tif'}, s_sep = ' - ', b_test=True):
+    '''
+    input:
+        s_sampleset: sample set name. e.g. jptma
+        s_slide: slide name. e.g. jp-tma1-1
+        es_scene: set of scenes of interest. The scenes have to be written in the same way as in the basin file name.
+            if None, all scenes are if interest. default is None.
+        s_ipath: absolute or relative path where the basin files can be found.
+        s_opath: path to where the fetched basin files should be outputed.
+            a folder, based on the s_sampleset, will be generated (if it not already exist), where the basin files will be placed.
+        es_filename_endswith: set of patters that defind the endings of the files of interest.
+        s_sep: separator to separate slide and scenes in the file name.
+        b_test: test flag. if True no files will be copied, it is just a simulation mode.
+
+    output:
+        folder with basin flies. placed at {s_opath}{s_sampleset}_segmentation_basin/
+
+    description:
+      fetches basin (cell label) files from Guillaume's segmentation pipeline
+      and copies them into a folder at s_opath, named according to s_sampleset name.
+    '''
+    # generate output directory
+    os.makedirs('{}{}_segmentation_basin/'.format(s_opath, s_sampleset), exist_ok=True)
+    # processing
+    if (es_scene is None):
+        i_total = 'all'
+    else:
+        i_total = len(es_scene) * len(es_filename_endswith)
+        es_sanity_scene = copy.deepcopy(es_scene)
+    i = 0
+    for s_file in sorted(os.listdir(s_ipath)):
+        # check for file of interest
+        b_flag = False
+        for s_filename_endswith in es_filename_endswith:
+            if (s_file.endswith(s_filename_endswith)):
+                if (es_scene is None):
+                    b_flag = True
+                    break
+                else:
+                    for s_scene in es_scene:
+                        if (s_file.startswith(s_scene + s_sep)):
+                            es_sanity_scene.discard(s_scene)
+                            b_flag = True
+                            break
+                break
+        # copy file
+        if (b_flag):
+            i += 1
+            print('copy {}/{}: {}{}{} ...'.format(i, i_total, s_slide, s_sep, s_file))
+            if not (b_test):
+                shutil.copyfile(src='{}{}'.format(s_ipath, s_file), dst='{}{}_segmentation_basin/{}{}{}'.format(s_opath, s_sampleset, s_slide, s_sep, s_file))
+    # sanity check
+    if not (es_scene is None) and (i != i_total):
+        sys.exit('Error: no file found for es_scene specified scene {}'.format(sorted(es_sanity_scene)))
\ No newline at end of file
diff --git a/mplex_image/codex.py b/mplex_image/codex.py
new file mode 100755
index 0000000..a67c58a
--- /dev/null
+++ b/mplex_image/codex.py
@@ -0,0 +1,452 @@
+# wrapper functions for codex image processing
+
+#from mplex_image import preprocess, mpimage, process, 
+from mplex_image import features 
+import os
+import pandas as pd
+import math
+import skimage
+from skimage import io, filters
+import re
+import numpy as np
+
+def parse_img(s_end = ".tif",s_start='reg'):
+    """
+    This function will parse images following akoya stiched naming convention
+    """
+    s_path = os.getcwd()
+    ls_file = []
+    for file in os.listdir():
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                ls_file = ls_file + [file]
+    df_img = pd.DataFrame(index=ls_file)
+    df_img['rounds'] = [item.split('_')[1].split('cyc')[1] for item in df_img.index]
+    df_img['color'] = [item.split('_')[3] for item in df_img.index]
+    df_img['slide'] = [item.split('_')[0] for item in df_img.index]
+    df_img['marker'] = [item.split('_')[-1].split('.')[0] for item in df_img.index]
+    df_img['marker_string'] = [item.split('_')[-1].split('.')[0] for item in df_img.index]
+    df_img['path'] = [f"{s_path}/{item}" for item in df_img.index]
+    return(df_img) 
+
+def load_li(ls_sample):
+    '''
+    load threshold on the segmentation marker images acquired during feature extraction
+    '''
+    df_img_all =pd.DataFrame()
+    for s_sample in ls_sample:
+        df_img = pd.read_csv(f'thresh_{s_sample}_ThresholdLi.csv', index_col=0)
+        df_img['rounds'] = [item.split('_')[1].split('cyc')[1] for item in df_img.index]
+        df_img['color'] = [item.split('_')[3] for item in df_img.index]
+        df_img['slide'] = s_sample
+        df_img['scene'] = [item.split('_')[0].split('reg')[1] for item in df_img.index]
+        df_img['marker'] = [item.split('_')[-1].split('.')[0] for item in df_img.index]  #parse file name for biomarker
+        df_img['slide_scene'] = df_img.slide + '_scene' + df_img.scene
+        df_img_all = df_img_all.append(df_img)
+    return(df_img_all)
+
+def underscore_to_dash(df_mi_full,df_img_all):
+    '''
+    the underscore in sample names will break downstream code; change to dash
+    '''
+    #naming underscore to dash
+    df_mi_full['slide'] = [item.split('_scene')[0].replace('_','-') for item in df_mi_full.index]
+    df_mi_full.index = [f"_scene{item.split('_scene')[1]}" for item in df_mi_full.index]
+    df_mi_full.index = df_mi_full.slide + df_mi_full.index
+    df_mi_full['scene'] = [item.split('_')[1] for item in df_mi_full.index]
+    df_mi_full['slide_scene'] = df_mi_full.slide + '_' + df_mi_full.scene
+    #df_img renameing
+    df_img_all['slide'] = [item.replace('_','-') for item in df_img_all.slide]
+    df_img_all['slide_scene'] = df_img_all.slide + '_scene' + df_img_all.scene
+    return(df_mi_full,df_img_all)
+
+def extract_cellpose_features(s_sample, segdir, subdir, ls_seg_markers, nuc_diam, cell_diam,s_scene='reg001'):
+    '''
+    load the segmentation results, the input images, and the channels images
+    extract mean intensity from each image, and centroid, area and eccentricity for 
+    '''
+
+    df_sample = pd.DataFrame()
+    df_thresh = pd.DataFrame()
+    if os.path.exists(f'{segdir}/{s_scene}Cellpose_Segmentation'):
+        os.chdir(f'{segdir}/{s_scene}Cellpose_Segmentation')
+    else:
+        os.chdir(f'{segdir}')
+    ls_scene = []
+    d_match = {}
+    for s_file in os.listdir():
+        if s_file.find(f'{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+        elif s_file.find(f'{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+    for s_scene in ['reg001']:  #ls_scene: #one scene
+        print(f'processing {s_scene}')
+        for s_file in os.listdir():
+            if s_file.find(s_scene) > -1:
+                if s_file.find("DAPI.png") > -1:
+                    s_dapi = s_file
+        dapi = io.imread(s_dapi)
+        print(f'loading {s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        print(f'loading {d_match[s_scene]}')
+        cell_labels = io.imread(d_match[s_scene])
+        #nuclear features
+        df_feat = features.extract_feat(labels,dapi, properties=(['mean_intensity']))
+        df_feat.columns = [f'{item}_segmented-nuclei' for item in df_feat.columns]
+        df_feat.index = [f'{s_sample}_scene{s_scene.split("reg")[1]}_cell{item}' for item in df_feat.index]
+
+        #get subcellular regions
+        cyto = features.label_difference(labels,cell_labels)
+        d_loc_nuc = features.subcellular_regions(labels, distance_short=2, distance_long=4)
+        d_loc_cell = features.subcellular_regions(cell_labels, distance_short=2, distance_long=4)
+        d_loc = {'nuclei':labels,'cell':cell_labels,'cytoplasm':cyto,
+         'nucmem':d_loc_nuc['membrane'][0],'cellmem':d_loc_cell['membrane'][0],
+         'perinuc4':d_loc_nuc['ring'][1],'exp4':d_loc_nuc['grown'][1],
+         'nucadj2':d_loc_nuc['straddle'][0],'celladj2':d_loc_cell['straddle'][0]}
+        #subdir organized by slide or scene
+        if os.path.exists(f'{subdir}/{s_sample}'):
+            os.chdir(f'{subdir}/{s_sample}')
+        elif os.path.exists(f'{subdir}/{s_scene}'):
+            os.chdir(f'{subdir}/{s_scene}')
+        else:
+            os.chdir(f'{subdir}')
+        df_img = parse_img()
+        df_img['round_int'] = [int(re.sub('[^0-9]','', item)) for item in df_img.rounds] 
+        df_img = df_img[df_img.round_int < 90]
+        df_img = df_img.sort_values('round_int')
+        df_scene = df_img# one scene [df_img.scene==s_scene.split("-Scene-")[1].split("_")[0]]
+
+        #load each image
+        for s_index in df_scene.index:
+                intensity_image = io.imread(s_index)
+                df_thresh.loc[s_index,'threshold_li'] =  filters.threshold_li(intensity_image)
+                if intensity_image.mean() > 0:
+                    df_thresh.loc[s_index,'threshold_otsu'] = filters.threshold_otsu(intensity_image)
+                    df_thresh.loc[s_index,'threshold_triangle'] = filters.threshold_triangle(intensity_image)
+                s_marker = df_scene.loc[s_index,'marker']
+                print(f'extracting features {s_marker}')
+                #if s_marker == 'DAPI':
+                #    s_marker = s_marker + f'{df_scene.loc[s_index,"rounds"].split("cyc")[1]}'
+                for s_loc, a_loc in d_loc.items():
+                    if s_loc == 'nuclei':
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity','centroid','area','eccentricity']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_centroid-0',f'{s_marker}_{s_loc}_centroid-1',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity']
+                    elif s_loc == 'cell':
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity','euler_number','area','eccentricity']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_euler',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity']
+                    else:
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}']
+
+                    #drop zero from array, set array ids as index
+                    df_marker_loc.index = sorted(np.unique(a_loc)[1::])
+                    df_marker_loc.index = [f'{s_sample}_scene{s_scene.split("reg")[1]}_cell{item}' for item in df_marker_loc.index]
+                    df_feat = df_feat.merge(df_marker_loc, left_index=True,right_index=True,how='left',suffixes=('',f'{s_marker}_{s_loc}'))
+        df_sample = df_sample.append(df_feat)
+    return(df_sample, df_thresh)
+
+def convert_tif(regdir,b_mkdir=True):
+    '''
+    convert codex tif to standard tif
+    '''
+    cwd = os.getcwd()
+    os.chdir(regdir)
+    for s_dir in sorted(os.listdir()):
+        if s_dir.find('reg')== 0:
+            os.chdir(s_dir)
+            for s_file in sorted(os.listdir()):
+                if s_file.find('.tif')>-1:
+                    #s_round = s_file.split("Cycle(")[1].split(").ome.tif")[0]
+                    #print(f'stain {s_round}')
+                    #s_dir_new = s_dir.split('_')[2] + '-Scene-0' + s_dir.split('F-')[1]
+                    #s_tissue_dir = s_dir.split('_F-')[0]
+                    if b_mkdir:
+                        preprocess.cmif_mkdir([f'{regdir}/converted_{s_dir}'])
+                    a_dapi = skimage.io.imread(s_file)
+                    with skimage.external.tifffile.TiffWriter(f'{regdir}/converted_{s_dir}/{s_file}') as tif:
+                        tif.save(a_dapi)
+            os.chdir('..')
+    os.chdir(cwd)
+
+def visualize_reg_images(s_sample,regdir,qcdir,color='ch001'):
+    """
+    array registered images to check tissue identity, focus, etc.
+    """
+    #check registration
+    preprocess.cmif_mkdir([f'{qcdir}/RegisteredImages'])
+    cwd = os.getcwd()
+    os.chdir(regdir)
+    #for idx, s_dir in enumerate(sorted(os.listdir())):
+    #    os.chdir(s_dir)
+    #    s_sample = s_dir.split('-Scene')[0]
+    #    print(s_sample)
+    df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='reg',s_split='_')
+    df_img.rename({'data':'scene'},axis=1,inplace=True)
+    df_img['slide'] = s_sample 
+    df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+    df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+    df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+    ls_scene = sorted(set(df_img.scene))
+    for s_scene in ls_scene:
+            print(s_scene)
+            df_img_scene = df_img[df_img.scene == s_scene]
+            df_img_stain = df_img_scene[df_img_scene.color==color]
+            df_img_sort = df_img_stain.sort_values(['rounds'])
+            i_sqrt = math.ceil(math.sqrt(len(df_img_sort)))
+            fig = mpimage.array_img(df_img_sort,s_column='color',s_row='rounds',s_label='marker',tu_array=(i_sqrt,i_sqrt),tu_fig=(16,14))
+            fig.savefig(f'{qcdir}/RegisteredImages/{s_scene}_registered_{color}.png')
+    os.chdir(cwd)
+    return(df_img_sort)
+
+def rename_files(d_rename,dir,b_test=True):
+    """
+    change file names
+    """
+    cwd = os.getcwd()
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('converted') == 0:
+            s_path = f'{dir}/{s_dir}'
+            os.chdir(s_path)
+            print(s_dir)
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='reg',s_split='_')
+            df_img.rename({'data':'scene'},axis=1,inplace=True)
+            df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+            if b_test:
+                print('This is a test')
+                preprocess.dchange_fname(d_rename,b_test=True)
+            elif b_test==False:
+                print('Changing name - not a test')
+                preprocess.dchange_fname(d_rename,b_test=False)
+        else:
+                pass
+
+def rename_fileorder(s_sample, dir, b_test=True):
+    """
+    change file names
+    """
+    cwd = os.getcwd()
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('converted') == 0:
+            s_path = f'{dir}/{s_dir}'
+            os.chdir(s_path)
+            print(s_dir)
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='Scene',s_split='_')
+            df_img.rename({'data':'scene'},axis=1,inplace=True)
+            df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+            for s_index in df_img.index:
+                s_round = df_img.loc[s_index,'rounds']
+                s_scene= f"{s_sample}-{df_img.loc[s_index,'scene']}"
+                s_marker = df_img.loc[s_index,'marker']
+                s_color = df_img.loc[s_index,'color']
+                s_index_rename = f'{s_round}_{s_scene}_{s_marker}_{s_color}_ORG.tif'
+                d_rename = {s_index:s_index_rename}
+                if b_test:
+                    print('This is a test')
+                    preprocess.dchange_fname(d_rename,b_test=True)
+                elif b_test==False:
+                    print('Changing name - not a test')
+                    preprocess.dchange_fname(d_rename,b_test=False)
+            else:
+                pass
+
+def copy_files(dir,dapi_copy, marker_copy,testbool=True,type='codex'):
+    """
+    copy and rename files if needed as dummies
+    need to edit
+    """
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('converted') == 0:
+            s_path = f'{dir}/{s_dir}'
+            os.chdir(s_path)
+            #s_sample = s_dir.split('-Scene')[0]
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R0',s_split='_')
+            df_img.rename({'data':'rounds'},axis=1,inplace=True)
+            df_img['scene'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[2].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+            print(s_dir)
+            for key, dapi_item in dapi_copy.items():
+                df_dapi = df_img[(df_img.rounds== key.split('_')[0]) & (df_img.color=='c1')]
+                s_dapi = df_dapi.loc[:,'marker'][0]
+                preprocess.copy_dapis(s_r_old=key,s_r_new=f'R{dapi_item}_',s_c_old='_c1_',
+                 s_c_new='_c2_',s_find=f'_{s_dapi}_c1_ORG.tif',b_test=testbool,type=type)
+            i_count=0
+            for idx,(key, item) in enumerate(marker_copy.items()):
+                preprocess.copy_markers(df_img, s_original=key, ls_copy = item,
+                 i_last_round= dapi_item + i_count, b_test=testbool,type=type)
+                i_count=i_count + len(item)
+    return(df_img)
+
+def segmentation_thresholds(regdir,qcdir, d_segment):
+    """
+    visualize binary mask of segmentaiton threholds
+    need to edit
+    """
+    preprocess.cmif_mkdir([f'{qcdir}/Segmentation'])
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('converted') == 0:
+            s_path = f'{regdir}/{s_dir}'
+            os.chdir(s_path)
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R',s_split='_')
+            df_img.rename({'data':'rounds'},axis=1,inplace=True)
+            df_img['scene'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[2].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+            s_sample = s_dir
+            print(s_sample)
+            d_seg = preprocess.check_seg_markers(df_img,d_segment, i_rows=1, t_figsize=(6,6)) #few scenes
+            for key, fig in d_seg.items():
+                fig.savefig(f'{qcdir}/Segmentation/{s_dir}_{key}_segmentation.png')
+    return(df_img)
+
+def parse_converted(dir):
+    '''
+    parse codex filenames (coverted)
+    '''
+    cwd = os.getcwd()
+    os.chdir(dir)
+    df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R',s_split='_')
+    df_img.rename({'data':'rounds'},axis=1,inplace=True)
+    df_img['scene'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+    df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+    df_img['marker'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+    os.chdir(cwd)
+    return(df_img)
+
+def segmentation_inputs(s_sample,regdir,segdir,d_segment,b_start=False):
+    """
+    make inputs for guillaumes segmentation
+    """
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('convert')== 0:
+            s_path = f'{regdir}/{s_dir}'
+            os.chdir(s_path)
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R',s_split='_')
+            df_img.rename({'data':'rounds'},axis=1,inplace=True)
+            #df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+            #s_sample = s_dir
+            #s_sample = s_dir.split('-Scene')[0]
+            print(s_sample)
+            df_marker = df_img[df_img.color!='c1']
+            df_marker = df_marker.sort_values(['rounds','color'])
+            df_dapi = pd.DataFrame(index = [df_marker.marker.tolist()],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+            df_dapi['rounds'] = df_marker.loc[:,['rounds']].values
+            df_dapi['colors'] = df_marker.loc[:,['color']].values
+            df_dapi['minimum'] = 1003
+            df_dapi['maximum'] = 65535
+            df_dapi['exposure'] = 100
+            df_dapi['refexp'] = 100
+            df_dapi['location'] = 'All'
+            for s_key,i_item in d_segment.items():
+                df_dapi.loc[s_key,'minimum'] = i_item
+            df_dapi.to_csv('RoundsCyclesTable.txt',sep=' ',header=False)
+            df_dapi.to_csv(f'metadata_{s_sample}_RoundsCyclesTable.csv',header=True)
+            #create cluster.java file
+            preprocess.cluster_java(s_dir=f'JE{idx}',s_sample=s_sample,imagedir=f'{s_path}',segmentdir=segdir,type='exacloud',b_segment=True,b_TMA=False)
+            if b_start:
+                os.chdir(f'/home/groups/graylab_share/Chin_Lab/ChinData/Work/engje/exacloud/JE{idx}') #exacloud
+                print(f'JE{idx}')
+                os.system('make_sh')
+
+def prepare_dataframe(s_sample,s_dapi,dapi_thresh,d_channel,ls_exclude,segdir,b_afsub=False):
+    '''
+    filter data by last dapi, standard location, subtract AF, output treshold csv
+    '''
+
+    os.chdir(f'{segdir}')
+    #load data
+    df_mi = process.load_mi(s_sample)
+    df_xy = process.load_xy(s_sample)
+    #drop extra centroid columns,add scene column
+    df_xy = df_xy.loc[:,['DAPI_X','DAPI_Y']]
+    df_xy = process.add_scene(df_xy)
+    df_xy.to_csv(f'features_{s_sample}_CentroidXY.csv')
+    #filter by last DAPI
+    df_dapi_mi = process.filter_dapi(df_mi,df_xy,s_dapi,dapi_thresh,b_images=True)
+    df_t = process.load_meta(s_sample, s_path='./',type='LocationCsv')
+    #filter mean intensity by biomarker location in metadata
+    df_filter_mi = process.filter_loc(df_dapi_mi,df_t)
+    df_filter_mi.to_csv(f'features_{s_sample}_FilteredMeanIntensity_{s_dapi}{dapi_thresh}.csv')
+    if b_afsub:
+        #load metadata
+        df_t = pd.read_csv(f'metadata_{s_sample}_RoundsCyclesTableExposure.csv',index_col=0,header=0)
+        #normalize by exposure time, and save to csv
+        lb_columns = [len(set([item]).intersection(set(df_t.index)))>0 for item in [item.split('_')[0] for item in df_filter_mi.columns]]
+        df_filter_mi = df_filter_mi.loc[:,lb_columns]
+        df_norm = process.exposure_norm(df_filter_mi,df_t)     
+        df_norm.to_csv(f'features_{s_sample}_ExpNormalizedMeanIntensity_{s_dapi}{dapi_thresh}.csv')
+        #subtract AF channels in data
+        df_sub,ls_sub,ls_record = process.af_subtract(df_norm,df_t,d_channel,ls_exclude)
+        df_out = process.output_subtract(df_sub,df_t)
+        df_out.to_csv(f'features_{s_sample}_AFSubtractedMeanIntensity_{s_dapi}{dapi_thresh}.csv')
+        f = open(f"{s_sample}_AFsubtractionData.txt", "w")
+        f.writelines(ls_record)
+        f.close()
+    else:
+        df_out = df_filter_mi
+    #output thresholding csv
+    df_out = process.add_scene(df_out) #df_out
+    df_thresh = process.make_thresh_df(df_out,ls_drop=None)
+    df_thresh.to_csv(f'thresh_XX_{s_sample}.csv')
+
+def multipage_tiff(d_combos,s_dapi,regdir):
+    '''
+    make custom overlays, either original of AF subtracted, save at 8 bit for size, and thresholding
+    '''
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        if s_dir.find('convert')== 0:
+            s_path = f'{regdir}/{s_dir}'
+            os.chdir(s_path)
+            df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R',s_split='_')
+            df_img.rename({'data':'rounds'},axis=1,inplace=True)
+            df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+            df_img['marker'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+            df_img['scene'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+            df_img['imagetype'] = [item[4].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+            df_dapi =  df_img[df_img.marker.str.contains(s_dapi.split('_')[0])]
+            df_img_stain = df_img[(~df_img.marker.str.contains('DAPI'))]
+            #check
+            es_test = set()
+            for key, item in d_combos.items():
+                es_test = es_test.union(item)
+            print(set(df_img_stain.marker) - es_test)
+            process.custom_overlays(d_combos, df_img_stain, df_dapi)
+        else:
+            continue
+        
+def load_crop_labels(d_crop,tu_dim,segdir,cropdir,s_find='Nuclei Segmentation Basins'):
+    """
+    crop the segmentation basins (cell of nuceli) to same coord as images for veiwing in Napari
+    s_find: 'exp5_CellSegmentationBasins' or 'Nuclei Segmentation Basins'
+    """
+    cwd = os.getcwd()
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        s_sample = s_scene.split('-Scene-')[0]
+        os.chdir(f'{segdir}')
+
+        for s_file in os.listdir():
+            if s_file.find(s_find) > -1: #Nuclei Segmentation Basins.tif #Cell Segmentation Basins.tif
+                if s_file.find(s_scene.split(s_sample)[1]) > -1:
+                    a_seg = skimage.io.imread(s_file)
+                    a_crop = a_seg[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+                    s_coor = f'x{xy_cropcoor[0]}y{xy_cropcoor[1]}.tif'
+                    #crop file
+                    s_file_new = f'{cropdir}/{s_sample}_{s_file.replace(" ","").replace(".tif",s_coor)}'
+                    print(s_file_new)
+                    skimage.io.imsave(s_file_new,a_crop)
+    os.chdir(cwd)
diff --git a/mplex_image/features.py b/mplex_image/features.py
new file mode 100755
index 0000000..7812462
--- /dev/null
+++ b/mplex_image/features.py
@@ -0,0 +1,603 @@
+####
+# title: features.py
+# language: Python3.7
+# date: 2020-06-00
+# license: GPL>=v3
+# author: Jenny
+# description:
+#   python3 script for single cell feature extraction
+####
+
+#libraries
+import os
+import sys
+import numpy as np
+import pandas as pd
+import shutil
+import skimage
+import scipy
+from scipy import stats
+from scipy import ndimage as ndi
+from skimage import measure, segmentation, morphology
+from skimage import io, filters
+import re
+import json
+from biotransistor import imagine
+from PIL import Image
+from mplex_image import process
+import matplotlib.pyplot as plt
+Image.MAX_IMAGE_PIXELS = 1000000000
+
+#functions
+def extract_feat(labels,intensity_image, properties=('centroid','mean_intensity','area','eccentricity')):
+    ''' 
+    given labels and intensity image, extract features to dataframe
+    '''
+    props = measure.regionprops_table(labels,intensity_image, properties=properties)
+    df_prop = pd.DataFrame(props)
+    return(df_prop)
+
+def expand_label(labels,distance=3):
+    '''
+    expand the nucelar labels by a fixed number of pixels
+    '''
+    boundaries = segmentation.find_boundaries(labels,mode='outer') #thick
+    shrunk_labels = labels.copy()
+    shrunk_labels[boundaries] = 0
+    background = shrunk_labels == 0
+    distances, (i, j) = scipy.ndimage.distance_transform_edt(
+                background, return_indices=True
+            )
+
+    grown_labels = labels.copy()
+    mask = background & (distances <= distance)
+    grown_labels[mask] = shrunk_labels[i[mask], j[mask]]
+    ring_labels = grown_labels - shrunk_labels
+
+    return(ring_labels, grown_labels) #shrunk_labels, grown_labels,
+
+def contract_label(labels,distance=3):
+    '''
+    contract labels by a fixed number of pixels
+    '''
+    boundaries = segmentation.find_boundaries(labels,mode='outer')
+    shrunk_labels = labels.copy()
+    shrunk_labels[boundaries] = 0
+    foreground = shrunk_labels != 0
+    distances, (i, j) = scipy.ndimage.distance_transform_edt(
+                     foreground, return_indices=True
+                 )
+
+    mask = foreground & (distances <= distance)
+    shrunk_labels[mask] = shrunk_labels[i[mask], j[mask]]
+    rim_labels = labels - shrunk_labels
+    return(rim_labels)
+
+def straddle_label(labels,distance=3):
+    '''
+    expand and contract labels by a fixed number of pixels
+    '''
+    boundaries = segmentation.find_boundaries(labels,mode='outer') #outer
+    shrunk_labels = labels.copy()
+    grown_labels = labels.copy()
+    shrunk_labels[boundaries] = 0
+    foreground = shrunk_labels != 0
+    background = shrunk_labels == 0
+    distances_f, (i, j) = scipy.ndimage.distance_transform_edt(
+                     foreground, return_indices=True
+                 )
+    distances_b, (i, j) = scipy.ndimage.distance_transform_edt(
+                background, return_indices=True
+            )
+    mask_f = foreground & (distances_f <= distance)
+    mask_b = background & (distances_b <= distance + 1)
+    shrunk_labels[mask_f] = 0
+    grown_labels[mask_b] = grown_labels[i[mask_b], j[mask_b]]
+    membrane_labels = grown_labels - shrunk_labels 
+    return(membrane_labels, grown_labels, shrunk_labels)
+
+def label_difference(labels,cell_labels):
+    '''
+    given matched nuclear and cell label IDs,return cell_labels minus labels
+    '''
+    overlap = cell_labels==labels
+    ring_rep = cell_labels.copy()
+    ring_rep[overlap] = 0
+    return(ring_rep)
+
+def get_mip(ls_img):
+    '''
+    maximum intensity projection of images (input list of filenames)
+    '''
+    imgs = []
+    for s_img in ls_img:
+        img = io.imread(s_img)
+        imgs.append(img)
+    mip = np.stack(imgs).max(axis=0)
+    return(mip)
+
+def thresh_li(img,area_threshold=100,low_thresh=1000):
+    '''
+    threshold an image with Li’s iterative Minimum Cross Entropy method
+    if too low, apply the low threshold instead (in case negative)
+    '''
+    mask = img >= filters.threshold_li(img)
+    mask = morphology.remove_small_holes(mask, area_threshold=area_threshold)
+    mask[mask < low_thresh] = 0
+    return(mask)
+
+def mask_border(mask,type='inner',pixel_distance = 50):
+    '''
+    for inner, distance transform from mask to background
+    for outer, distance transform from back ground to mask
+    returns a mask
+    '''
+    shrunk_mask = mask.copy()
+    if type == 'inner':
+        foreground = ~mask
+        background = mask
+    elif type == 'outer':
+        foreground = ~mask
+        background = mask
+    distances, (i, j) = scipy.ndimage.distance_transform_edt(
+                background, return_indices=True
+            )
+    maskdist = mask & (distances <= pixel_distance)
+    shrunk_mask[maskdist] = shrunk_mask[i[maskdist], j[maskdist]]
+    mask_out = np.logical_and(mask,np.logical_not(shrunk_mask))
+    return(mask_out,shrunk_mask,maskdist,distances)
+
+def mask_labels(mask,labels):
+    ''''
+    return the labels that fall within the mask
+    '''
+    selected_array = labels[mask]
+    a_unique = np.unique(selected_array)
+    return(a_unique)
+
+def parse_org(s_end = "ORG.tif",s_start='R'):
+    """
+    This function will parse images following koei's naming convention
+    Example: Registered-R1_PCNA.CD8.PD1.CK19_Her2B-K157-Scene-002_c1_ORG.tif
+    The output is a dataframe with image filename in index
+    And rounds, color, imagetype, scene (/tissue), and marker in the columns
+    """
+    ls_file = []
+    for file in os.listdir():
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                ls_file = ls_file + [file]
+    df_img = pd.DataFrame(index=ls_file)
+    df_img['rounds'] = [item.split('_')[0].split('Registered-')[1] for item in df_img.index]
+    df_img['color'] = [item.split('_')[-2] for item in df_img.index]
+    df_img['slide'] = [item.split('_')[2] for item in df_img.index]
+    df_img['scene'] = [item.split('-Scene-')[1] for item in df_img.slide]
+    #parse file name for biomarker
+    for s_index in df_img.index:
+        #print(s_index)
+        s_color = df_img.loc[s_index,'color']
+        if s_color == 'c1':
+            s_marker = 'DAPI'
+        elif s_color == 'c2':
+            s_marker = s_index.split('_')[1].split('.')[0]
+        elif s_color == 'c3':
+            s_marker = s_index.split('_')[1].split('.')[1]
+        elif s_color == 'c4':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c5':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        elif s_color == 'c6':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c7':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        else: print('Error')
+        df_img.loc[s_index,'marker'] = s_marker
+    return(df_img) 
+
+def extract_cellpose_features(s_sample, segdir, subdir, ls_seg_markers, nuc_diam, cell_diam,b_big=False): #,b_thresh=False
+    '''
+    load the segmentation results, the input images, and the channels images
+    extract mean intensity from each image, and centroid, area and eccentricity for 
+    '''
+
+    df_sample = pd.DataFrame()
+    df_thresh = pd.DataFrame()
+
+    os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    ls_scene = []
+    d_match = {}
+    for s_file in os.listdir():
+        if s_file.find(f'{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+        elif s_file.find(f'{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+    for s_scene in ls_scene:
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+        print(f'processing {s_scene}')
+        for s_file in os.listdir():
+            if s_file.find(s_scene) > -1:
+                if s_file.find("DAPI.png") > -1:
+                    s_dapi = s_file
+        dapi = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{s_dapi}')
+        print(f'loading {s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        cell_labels = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{d_match[s_scene]}')
+        print(f'loading {d_match[s_scene]}')
+        #nuclear features
+        df_feat = extract_feat(labels,dapi, properties=(['label']))
+        df_feat.columns = [f'{item}_segmented-nuclei' for item in df_feat.columns]
+        df_feat.index = [f'{s_sample}_scene{s_scene.split("-Scene-")[1].split("_")[0]}_cell{item}' for item in df_feat.loc[:,'label_segmented-nuclei']]
+
+        #get subcellular regions
+        cyto = label_difference(labels,cell_labels)
+        d_loc_nuc = subcellular_regions(labels, distance_short=2, distance_long=5)
+        d_loc_cell = subcellular_regions(cell_labels, distance_short=2, distance_long=5)
+        d_loc = {'nuclei':labels,'cell':cell_labels,'cytoplasm':cyto,
+         'nucmem':d_loc_nuc['membrane'][0],'cellmem':d_loc_cell['membrane'][0],
+         'perinuc5':d_loc_nuc['ring'][1],'exp5':d_loc_nuc['grown'][1],
+         'nucadj2':d_loc_nuc['straddle'][0],'celladj2':d_loc_cell['straddle'][0]}
+
+        #subdir organized by slide or scene
+        if os.path.exists(f'{subdir}/{s_sample}'):
+            os.chdir(f'{subdir}/{s_sample}')
+        elif os.path.exists(f'{subdir}/{s_scene}'):
+            os.chdir(f'{subdir}/{s_scene}')
+        else:
+            os.chdir(f'{subdir}')
+        df_img = parse_org()
+        df_img['round_int'] = [int(re.sub('[^0-9]','', item)) for item in df_img.rounds] 
+        df_img = df_img[df_img.round_int < 90]
+        df_img = df_img.sort_values('round_int')
+        df_scene = df_img[df_img.scene==s_scene.split("-Scene-")[1].split("_")[0]]
+
+        #load each image
+        for s_index in df_scene.index:
+                intensity_image = io.imread(s_index)
+                df_thresh.loc[s_index,'threshold_li'] =  filters.threshold_li(intensity_image)
+                if intensity_image.mean() > 0:
+                    df_thresh.loc[s_index,'threshold_otsu'] = filters.threshold_otsu(intensity_image)
+                    df_thresh.loc[s_index,'threshold_triangle'] = filters.threshold_triangle(intensity_image)
+                #if b_thresh:
+                #    break
+                s_marker = df_scene.loc[s_index,'marker']
+                print(f'extracting features {s_marker}')
+                if s_marker == 'DAPI':
+                    s_marker = s_marker + f'{df_scene.loc[s_index,"rounds"].split("R")[1]}'
+                for s_loc, a_loc in d_loc.items():
+                    if s_loc == 'nuclei':
+                        df_marker_loc = extract_feat(a_loc,intensity_image, properties=(['mean_intensity','centroid','area','eccentricity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_centroid-0',f'{s_marker}_{s_loc}_centroid-1',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity',f'{s_marker}_{s_loc}_label']
+                    elif s_loc == 'cell':
+                        df_marker_loc = extract_feat(a_loc,intensity_image, properties=(['mean_intensity','euler_number','area','eccentricity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_euler',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity',f'{s_marker}_{s_loc}_label']
+                    else:
+                        df_marker_loc = extract_feat(a_loc,intensity_image, properties=(['mean_intensity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_label']
+                    #drop zero from array, set array ids as index
+                    #old df_marker_loc.index = sorted(np.unique(a_loc)[1::])
+                    df_marker_loc.index = df_marker_loc.loc[:,f'{s_marker}_{s_loc}_label']
+                    df_marker_loc.index = [f'{s_sample}_scene{s_scene.split("-Scene-")[1].split("_")[0]}_cell{item}' for item in df_marker_loc.index]
+                    df_feat = df_feat.merge(df_marker_loc, left_index=True,right_index=True,how='left',suffixes=('',f'{s_marker}_{s_loc}'))
+        if b_big:
+            df_feat.to_csv(f'{segdir}/{s_sample}Cellpose_Segmentation/features_{s_sample}-{s_scene}.csv')
+        df_sample = df_sample.append(df_feat)
+    return(df_sample, df_thresh)
+
+def extract_bright_features(s_sample, segdir, subdir, ls_seg_markers, nuc_diam, cell_diam,ls_membrane):
+    '''
+    load the features, segmentation results, the input images, and the channels images
+    extract mean intensity of the top 25% of pixel in from each label region
+    '''
+    df_sample = pd.DataFrame()
+    os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    ls_scene = []
+    d_match = {}
+    for s_file in os.listdir():
+        if s_file.find(f'{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+        elif s_file.find(f'{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split('_')[0])
+            d_match.update({s_file.split('_')[0]:s_file})
+    for s_scene in ls_scene:
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+        print(f'processing {s_scene}')
+        for s_file in os.listdir():
+            if s_file.find(s_scene) > -1:
+                if s_file.find("DAPI.png") > -1:
+                    s_dapi = s_file
+        dapi = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{s_dapi}')
+        print(f'loading {s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        print(labels.shape)
+        cell_labels = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{d_match[s_scene]}')
+        print(cell_labels.shape)
+        print(f'loading {d_match[s_scene]}')
+        #nuclear features
+        df_feat = extract_feat(labels,dapi, properties=(['label']))
+        df_feat.columns = [f'{item}_segmented-nuclei' for item in df_feat.columns]
+        df_feat.index = [f'{s_sample}_scene{s_scene.split("-Scene-")[1].split("_")[0]}_cell{item}' for item in df_feat.loc[:,'label_segmented-nuclei']]
+
+        #get subcellular regions
+        d_loc_nuc = subcellular_regions(labels, distance_short=2, distance_long=5)
+        d_loc_cell = subcellular_regions(cell_labels, distance_short=2, distance_long=5)
+        d_loc = {'nucmem25':d_loc_nuc['membrane'][0],'exp5nucmembrane25':d_loc_nuc['grown'][1],
+            'cellmem25':d_loc_cell['membrane'][0],'nuclei25':labels}
+
+        #subdir organized by slide or scene
+        if os.path.exists(f'{subdir}/{s_sample}'):
+            os.chdir(f'{subdir}/{s_sample}')
+        elif os.path.exists(f'{subdir}/{s_scene}'):
+            os.chdir(f'{subdir}/{s_scene}')
+        else:
+            os.chdir(f'{subdir}')
+        df_img = parse_org()
+        df_img['round_int'] = [int(re.sub('[^0-9]','', item)) for item in df_img.rounds] 
+        df_img = df_img[df_img.round_int < 90]
+        df_img = df_img.sort_values('round_int')
+        df_scene = df_img[df_img.scene==s_scene.split("-Scene-")[1].split("_")[0]]
+        df_marker = df_scene[df_scene.marker.isin(ls_membrane)]
+        #load each image
+        for s_index in df_marker.index:
+                print(f'loading {s_index}')
+                intensity_image = io.imread(s_index)
+                #print(intensity_image.shape)
+                s_marker = df_marker.loc[s_index,'marker']
+                print(f'extracting features {s_marker}')
+                if s_marker == 'DAPI':
+                    s_marker = s_marker + f'{df_marker.loc[s_index,"rounds"].split("R")[1]}'
+                for s_loc, a_loc in d_loc.items():
+                    #print(a_loc.shape)
+                    df_marker_loc = pd.DataFrame(columns = [f'{s_marker}_{s_loc}'])
+                    df_prop = extract_feat(a_loc,intensity_image, properties=(['intensity_image','image','label']))
+                    for idx in df_prop.index:
+                        label_id = df_prop.loc[idx,'label']
+                        intensity_image_small = df_prop.loc[idx,'intensity_image']
+                        image = df_prop.loc[idx,'image']
+                        pixels = intensity_image_small[image]
+                        pixels25 = pixels[pixels >= np.quantile(pixels,.75)]
+                        df_marker_loc.loc[label_id,f'{s_marker}_{s_loc}'] = pixels25.mean()
+                    df_marker_loc.index = [f'{s_sample}_scene{s_scene.split("-Scene-")[1].split("_")[0]}_cell{item}' for item in df_marker_loc.index]
+                    df_feat = df_feat.merge(df_marker_loc, left_index=True,right_index=True,how='left',suffixes=('',f'{s_marker}_{s_loc}'))
+        df_sample = df_sample.append(df_feat)
+        #break
+    return(df_sample)
+
+def subcellular_regions(labels, distance_short=2, distance_long=5):
+    '''
+    calculate subcellular segmentation regions from segmentation mask
+    '''
+    membrane_short = contract_label(labels,distance=distance_short)
+    membrane_long = contract_label(labels,distance=distance_long)
+    ring_short, grown_short = expand_label(labels,distance=distance_short)
+    ring_long, grown_long = expand_label(labels,distance=distance_long)
+    straddle_short, __, shrink_short = straddle_label(labels,distance=distance_short)
+    straddle_long, __, shrink_long = straddle_label(labels,distance=distance_long)
+    d_loc_sl={'membrane':(membrane_short,membrane_long),
+     'ring':(ring_short,ring_long),
+     'straddle':(straddle_short,straddle_long),
+     'grown':(grown_short,grown_long),
+     'shrunk':(shrink_short,shrink_long)}
+    return(d_loc_sl)
+ 
+def combine_labels(s_sample,segdir, subdir, ls_seg_markers, nuc_diam, cell_diam, df_mi_full,s_thresh):
+    '''
+    - load cell labels; delete cells that were not used for cytoplasm (i.e. ecad neg)
+    - nuc labels, expand to perinuc 5 and then cut out the cell labels
+    - keep track of cells that are completely coverd by another cell (or two or three: counts as touching).
+    '''
+    se_neg = df_mi_full[df_mi_full.slide == s_sample].loc[:,f'{s_thresh}_negative']
+    print(len(se_neg))
+    dd_result = {}
+    if os.path.exists(f'{segdir}/{s_sample}Cellpose_Segmentation'):
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    else:
+        os.chdir(segdir)
+        print(segdir)
+    ls_scene = []
+    for s_file in os.listdir():
+        if s_file.find(' - DAPI.png') > -1:
+            ls_scene.append(s_file.split(' - DAPI.png')[0])
+    ls_scene_all = sorted(set([item.split('_cell')[0].replace('_scene','-Scene-') for item in se_neg.index]) & set(ls_scene))
+    if len(ls_scene_all) == 0:
+        ls_scene_all = sorted(set([item.split('_cell')[0].replace('_scene','-Scene-').split('_')[1] for item in se_neg.index]) & set(ls_scene))
+    print(ls_scene_all)
+    for s_scene in ls_scene_all:
+        se_neg_scene = se_neg[se_neg.index.str.contains(s_scene.replace("Scene ","scene")) | se_neg.index.str.contains(s_scene.replace("-Scene-","_scene"))]
+        print(f'Processing combined segmentaiton labels for {s_scene}')
+        if os.path.exists(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif'):
+            labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        else:
+            print('no nuclei labels found')
+        if os.path.exists(f'{s_scene} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        elif os.path.exists(f'{s_scene}_{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene}_{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        elif os.path.exists(f'{s_scene}_{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene}_{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        else:
+            print('no cell labels found')
+        #set non-ecad cell labels to zero
+        a_zeros = np.array([int(item.split('_cell')[1]) for item in se_neg_scene[se_neg_scene].index]).astype('int64')
+        mask = np.isin(cell_labels, a_zeros)
+        cell_labels_copy = cell_labels.copy()
+        cell_labels_copy[mask] = 0
+        #make the nuclei under cells zero
+        labels_copy = labels.copy()
+        distance = 5
+        perinuc5, labels_exp = expand_label(labels,distance=distance)
+        labels_exp[cell_labels_copy > 0] = 0
+        #combine calls and expanded nuclei
+        combine = (labels_exp + cell_labels_copy)
+        if s_scene.find('Scene') == 0:
+            io.imsave(f'{s_sample}_{s_scene.replace("Scene ","scene")}_cell{cell_diam}_nuc{nuc_diam}_CombinedSegmentationBasins.tif',combine)
+        else:
+            io.imsave(f'{s_scene}_{".".join(ls_seg_markers)}-cell{cell_diam}_exp{distance}_CellSegmentationBasins.tif',combine)
+        #figure out the covered cells...labels + combined
+        not_zero_pixels =  np.array([labels.ravel() !=0,combine.ravel() !=0]).all(axis=0)
+        a_tups = np.array([combine.ravel()[not_zero_pixels],labels.ravel()[not_zero_pixels]]).T #combined over nuclei
+        unique_rows = np.unique(a_tups, axis=0)
+        new_dict = {}
+        for key, value in unique_rows:
+            if key == value:
+                continue
+            else:
+                if key in new_dict:
+                    new_dict[key].append(value)
+                else:
+                    new_dict[key] = [value]
+        #from elmar (reformat cells touching dictionary and save
+        d_result = {}
+        for i_cell, li_touch in new_dict.items():
+            d_result.update({str(i_cell): [str(i_touch) for i_touch in li_touch]})
+        dd_result.update({f'{s_sample}_{s_scene.replace("Scene ","scene")}':d_result})
+    #save dd_touch as json file
+    with open(f'result_{s_sample}_cellsatop_dictionary.json','w') as f: 
+        json.dump(dd_result, f)
+    print('')
+    return(labels,combine,dd_result)
+
+def check_basins(cell_labels, cell_diam):
+    dai_value = {'a':cell_labels}
+    df = imagine.membrane_px(cell_labels,dai_value)
+    ls_bad = sorted(set(df[df.x_relative > 10*cell_diam].cell) | set(df[df.y_relative > 10*cell_diam].cell))
+    return(ls_bad)
+
+def check_combined(segdir,s_sample,cell_diam,ls_seg_markers):
+    df_result = pd.DataFrame()
+    if os.path.exists(f'{segdir}/{s_sample}Cellpose_Segmentation'):
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    else:
+        os.chdir(segdir)
+    ls_scene = []
+    for s_file in os.listdir():
+        if s_file.find(' - DAPI.png') > -1:
+            ls_scene.append(s_file.split(' - DAPI.png')[0])
+    for s_scene in sorted(ls_scene):
+        print(s_scene)
+        if os.path.exists(f'{s_scene}_{".".join(ls_seg_markers)}-cell{cell_diam}_exp5_CellSegmentationBasins.tif'):
+            cell_labels = io.imread(f'{s_scene}_{".".join(ls_seg_markers)}-cell{cell_diam}_exp5_CellSegmentationBasins.tif')
+            print(f'Loaded {s_scene}_{".".join(ls_seg_markers)}-cell{cell_diam}_exp5_CellSegmentationBasins.tif')
+            ls_bad = check_basins(cell_labels, cell_diam)
+            ls_bad_cells = [f"{s_scene.replace('-Scene-','_scene')}_cell{item}" for item in ls_bad]
+            df_bad = pd.DataFrame(index=ls_bad_cells,columns=['bad_match'],data=[True]*len(ls_bad_cells))
+            df_result = df_result.append(df_bad)
+        else:
+            print('no combined cell labels found')
+    return(df_result)
+
+def edge_mask(s_sample,segdir,subdir,i_pixel=154, dapi_thresh=350,i_fill=50000,i_close=20):
+    '''
+    find edge of the tissue. first, find tissue by threshodling DAPI R1 (pixels above dapi_thresh)
+    then, mask all pixels within i_pixel distance of tissue border
+    return/save binary mask
+    '''
+    os.chdir(segdir)
+    df_img = process.load_li([s_sample],s_thresh='', man_thresh=100)
+    for s_scene in sorted(set(df_img.scene)):
+        print(f'Calculating tissue edge mask for Scene {s_scene}')
+        s_index = df_img[(df_img.scene == s_scene) & (df_img.rounds == 'R1') & (df_img.color =='c1')].index[0]
+        if os.path.exists(f'{subdir}/{s_sample}/{s_index}'):
+            img_dapi = io.imread(f'{subdir}/{s_sample}/{s_index}')
+        elif os.path.exists(f'{subdir}/{s_sample}-Scene-{s_scene}/{s_index}'):
+            img_dapi = io.imread(f'{subdir}/{s_sample}-Scene-{s_scene}/{s_index}')
+        else:
+            print('no DAPI found')
+            img_dapi = np.zeros([2,2])
+        mask = img_dapi > dapi_thresh 
+        mask_small = morphology.remove_small_objects(mask, min_size=100)
+        mask_closed = morphology.binary_closing(mask_small, morphology.octagon(i_close,i_close//2))
+        mask_filled = morphology.remove_small_holes(mask_closed, i_fill)
+        border_mask, __, __,distances = mask_border(mask_filled,type='inner',pixel_distance = i_pixel)
+        img = np.zeros(border_mask.shape,dtype='uint8')
+        img[border_mask] = 255
+        io.imsave(f"{segdir}/TissueEdgeMask{i_pixel}_{s_sample}_scene{s_scene}.png", img)
+
+def edge_hull(s_sample,segdir,subdir,i_pixel=154, dapi_thresh=350,i_fill=50000,i_close=40,i_small=30000):
+    '''
+    find edge of the tissue. first, find tissue by threshodling DAPI R1 (pixels above dapi_thresh)
+    then, mask all pixels within i_pixel distance of tissue border
+    return/save binary mask
+    '''
+    os.chdir(segdir)
+    df_img = process.load_li([s_sample],s_thresh='', man_thresh=100)
+    for s_scene in sorted(set(df_img.scene)):
+        print(f'Calculating tissue edge mask for Scene {s_scene}')
+        s_index = df_img[(df_img.scene == s_scene) & (df_img.rounds == 'R1') & (df_img.color =='c1')].index[0]
+        if os.path.exists(f'{subdir}/{s_sample}/{s_index}'):
+            img_dapi = io.imread(f'{subdir}/{s_sample}/{s_index}')
+        elif os.path.exists(f'{subdir}/{s_sample}-Scene-{s_scene}/{s_index}'):
+            img_dapi = io.imread(f'{subdir}/{s_sample}-Scene-{s_scene}/{s_index}')
+        else:
+            print('no DAPI found')
+            img_dapi = np.zeros([2,2])
+        mask = img_dapi > dapi_thresh 
+        mask_small = morphology.remove_small_objects(mask, min_size=100)
+        mask_closed = morphology.binary_closing(mask_small, morphology.octagon(i_close,i_close//2))
+        mask_filled = morphology.remove_small_holes(mask_closed, i_fill)
+        mask_smaller = morphology.remove_small_objects(mask, min_size=i_small)
+        mask_hull = morphology.convex_hull_image(mask_smaller)
+        border_mask, __, __,distances = mask_border(mask_filled,type='inner',pixel_distance = i_pixel)
+        img = np.zeros(border_mask.shape,dtype='uint8')
+        img[border_mask] = 255
+        io.imsave(f"{segdir}/TissueEdgeMask{i_pixel}_{s_sample}_scene{s_scene}.png", img)
+
+def edge_cells(s_sample,segdir,nuc_diam,i_pixel=154):
+    '''
+    load a binary mask of tissue, cell labels, and xy coord datafreame.
+    return data frame of cells witin binary mask
+    '''
+    df_sample = pd.DataFrame()
+    #load xy
+    df_xy = pd.read_csv(f'{segdir}/features_{s_sample}_CentroidXY.csv',index_col=0)
+    df_xy['cells'] = [int(item.split('cell')[1]) for item in df_xy.index]
+    ls_scene = sorted(set([item.split('_')[1].split('scene')[1] for item in df_xy.index]))
+    #load masks
+    for s_scene in ls_scene:
+        print(f'Calculating edge cells for Scene {s_scene}')
+        mask = io.imread(f"{segdir}/TissueEdgeMask{i_pixel}_{s_sample}_scene{s_scene}.png")
+        mask_gray = mask == 255
+        labels = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{s_sample}-Scene-{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        edge = mask_labels(mask_gray,labels)
+        df_scene = df_xy[df_xy.index.str.contains(f'{s_sample}_scene{s_scene}')]
+        #works
+        es_cells = set(edge.astype('int')).intersection(set(df_scene.cells))
+        df_edge = df_scene[df_scene.cells.isin(es_cells)]
+        fig,ax=plt.subplots()
+        ax.imshow(mask_gray)
+        ax.scatter(df_edge.DAPI_X,df_edge.DAPI_Y,s=1)
+        fig.savefig(f'{segdir}/TissueEdgeMask{i_pixel}_{s_sample}-Scene-{s_scene}_cells.png')
+        df_sample = df_sample.append(df_edge)
+    return(df_sample)
+
+def cell_distances(df_xy,s_scene,distances):
+    '''
+    load a binary mask of tissue, cell labels, and xy coord datafreame.
+    return data frame of cells witin binary mask
+    '''
+    df_xy['DAPI_Y'] = df_xy.DAPI_Y.astype('int64')
+    df_xy['DAPI_X'] = df_xy.DAPI_X.astype('int64')
+    print(f'Calculating distances for Scene {s_scene}')
+    df_scene = df_xy[df_xy.index.str.contains(f"{s_scene.replace('-Scene-','_scene')}")].copy()
+    df_scene['pixel_dist'] = distances[df_scene.DAPI_Y,df_scene.DAPI_X]
+    return(df_scene)
+
+def cell_coords():
+    '''
+    TBD: find cell coordinate within a mask
+    '''
+    for s_scene in ls_scene:
+        #old (use if you have coordinates, not labels)
+        #mask_gray = mask#[:,:,0]
+        #contour = skimage.measure.find_contours(mask_gray,0)
+        #coords = skimage.measure.approximate_polygon(contour[0], tolerance=5)
+        #fig,ax=plt.subplots()
+        #ax.imshow(mask_gray)
+        #ax.plot(coords[:, 1], coords[:, 0], '-r', linewidth=2)
+        #fig.savefig(f'TissueEdgeMask_{s_sample}_Scene-{s_scene}_polygon.png')
+        #x = np.array(df_scene.DAPI_X.astype('int').values)
+        #y = np.array(df_scene.DAPI_Y.astype('int').values)
+        #points = np.array((y,x)).T
+        mask = skimage.measure.points_in_poly(points, coords)
\ No newline at end of file
diff --git a/mplex_image/gating.py b/mplex_image/gating.py
new file mode 100755
index 0000000..a3665fc
--- /dev/null
+++ b/mplex_image/gating.py
@@ -0,0 +1,205 @@
+#####
+# gating.py
+# author:  engje, grael
+# date: 2020-04-07
+# license: GPLv3
+#####
+
+# library
+import os
+import pandas as pd
+import shutil
+from mplex_image import analyze
+import numpy as np
+
+
+def main_celltypes(df_data,ls_endothelial,ls_immune,ls_tumor,ls_cellline_index):
+    #celltpye
+    #1 endothelial
+    df_data['endothelial'] = df_data.loc[:,ls_endothelial].any(axis=1)
+    #2 immune
+    ls_exclude = ls_endothelial 
+    df_data['immune'] = df_data.loc[:,ls_immune].any(axis=1) & ~df_data.loc[:,ls_exclude].any(axis=1)
+    #3 tumor
+    ls_exclude =  ls_endothelial + ls_immune
+    df_data['tumor'] = df_data.loc[:,ls_tumor].any(axis=1) & ~df_data.loc[:,ls_exclude].any(axis=1) 
+    #4 stromal
+    ls_exclude = ls_immune + ls_endothelial + ls_tumor
+    df_data['stromal'] = ~df_data.loc[:,ls_exclude].any(axis=1)
+    #add celltype
+    ls_cell_names = ['stromal','endothelial','tumor','immune']
+    s_type_name = 'celltype'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    #fix cell lines (all tumor!)
+    df_data['slide_scene'] = [item.split('_cell')[0] for item in df_data.index]
+    df_data.loc[df_data[df_data.slide_scene.isin(ls_cellline_index)].index,'celltype'] = 'tumor'
+    df_data['immune'] = df_data.loc[:,'celltype'] == 'immune'
+    df_data['stromal'] = df_data.loc[:,'celltype'] == 'stromal'
+    df_data['endothelial'] = df_data.loc[:,'celltype'] == 'endothelial'
+    return(df_data)
+
+def proliferation(df_data,ls_prolif):
+    #proliferation
+    df_data['prolif'] = df_data.loc[:,ls_prolif].any(axis=1)
+    df_data['nonprolif'] = ~df_data.loc[:,ls_prolif].any(axis=1)
+    #add proliferation
+    ls_cell_names = ['prolif','nonprolif']
+    s_type_name = 'proliferation'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    return(df_data)
+
+def immune_types(df_data,s_myeloid,s_bcell,s_tcell):
+    ## T cell, B cell or myeloid
+    df_data['CD68Mac'] = df_data.loc[:,[s_myeloid,'immune']].all(axis=1) 
+    df_data['CD20Bcell'] = df_data.loc[:,[s_bcell,'immune']].all(axis=1) & ~df_data.loc[:,['CD68Mac',s_tcell]].any(axis=1)
+    df_data['TcellImmune'] = df_data.loc[:,[s_tcell,'immune']].all(axis=1) & ~df_data.loc[:,['CD20Bcell','CD68Mac']].any(axis=1)
+    df_data['UnspecifiedImmune'] = df_data.loc[:,'immune'] & ~df_data.loc[:,['CD20Bcell','TcellImmune','CD68Mac']].any(axis=1)
+    ## CD4 and CD8 
+    if df_data.columns.isin(['CD8_Ring','CD4_Ring']).sum()==2:
+        #print('CD4 AND CD8')
+        df_data['CD8Tcell'] = df_data.loc[: ,['CD8_Ring','TcellImmune']].all(axis=1)
+        df_data['CD4Tcell'] = df_data.loc[: ,['CD4_Ring','TcellImmune']].all(axis=1) & ~df_data.loc[: ,'CD8Tcell']
+        df_data['UnspecifiedTcell'] = df_data.TcellImmune & ~df_data.loc[:,['CD8Tcell','CD4Tcell']].any(axis=1) #if cd4 or 8 then sum = 2
+        ## check
+        ls_immune = df_data[df_data.loc[:,'TcellImmune']].index.tolist()
+        if ((df_data.loc[ls_immune,['CD8Tcell','CD4Tcell','UnspecifiedTcell']].sum(axis=1)!=1)).any():
+            print('Error in Tcell cell types')
+        ls_immuntype = ['CD68Mac','CD20Bcell','UnspecifiedImmune','CD8Tcell','CD4Tcell','UnspecifiedTcell'] #'TcellImmune',
+    #add Immunetype
+    ls_cell_names = ls_immuntype
+    s_type_name = 'ImmuneType'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+
+    #get rid of unspecfied immune cells (make them stroma)
+    ls_index = df_data[df_data.ImmuneType.fillna('x').str.contains('Unspecified')].index
+    df_data.loc[ls_index,'celltype'] = 'stromal'
+    df_data.loc[ls_index,'ImmuneType'] = np.nan
+    df_data.loc[ls_index,'stromal'] = True
+    df_data.loc[ls_index,'immune'] = False
+    return(df_data)
+
+def immune_functional(df_data,ls_immune_functional):
+    #Immune functional states 
+    df_data.rename(dict(zip(ls_immune_functional,[item.split('_')[0] for item in ls_immune_functional])),axis=1,inplace=True)
+    df_func = analyze.combinations(df_data,[item.split('_')[0] for item in ls_immune_functional])
+    df_data = df_data.merge(df_func,how='left', left_index=True, right_index=True, suffixes = ('_all',''))
+    #gated combinations: immune type plus fuctional status
+    ls_gate = sorted(df_data[~df_data.ImmuneType.isna()].loc[:,'ImmuneType'].unique())
+    ls_marker = df_func.columns.tolist()
+    df_gate_counts = analyze.gated_combinations(df_data,ls_gate,ls_marker)
+    df_data = df_data.merge(df_gate_counts, how='left', left_index=True, right_index=True,suffixes = ('_all',''))
+    #add FuncImmune
+    ls_cell_names = df_gate_counts.columns.tolist()
+    s_type_name ='FuncImmune'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    return(df_data)
+
+########################################
+#CellProlif combinations, main cell types and proliferation
+######################################
+def cell_prolif(df_data, s_gate='celltype',ls_combo =['prolif','nonprolif']):
+    ls_gate = df_data.loc[:,s_gate].unique().tolist()
+    df_gate_counts2 = analyze.gated_combinations(df_data,ls_gate,ls_combo)
+    df_data = df_data.merge(df_gate_counts2, how='left', left_index=True, right_index=True,suffixes = ('_all',''))
+    #add CellProlif
+    ls_cell_names = ['endothelial_prolif','endothelial_nonprolif', 'tumor_prolif', 'tumor_nonprolif',
+       'stromal_prolif', 'stromal_nonprolif', 'immune_prolif','immune_nonprolif']
+    ls_cell_names = df_gate_counts2.columns.tolist()
+    s_type_name = 'CellProlif'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    return(df_data)
+
+def diff_hr_state(df_data,ls_luminal,ls_basal,ls_mes):
+    ls_mes = df_data.columns[(df_data.dtypes=='bool') & (df_data.columns.isin(ls_mes) | df_data.columns.isin([item.split('_')[0] for item in ls_mes]))].tolist()
+    print('differentiation')
+    df_data['Lum'] = df_data.loc[:,ls_luminal].any(axis=1) & df_data.tumor
+    df_data['Bas'] = df_data.loc[:,ls_basal].any(axis=1)  & df_data.tumor
+    df_data['Mes'] = df_data.loc[:,ls_mes].any(axis=1) & df_data.tumor
+
+    print('hormonal status')
+    df_data['ER'] = df_data.loc[:,['tumor','ER_Nuclei']].all(axis=1)
+    df_data['HER2'] = df_data.loc[:,['tumor','HER2_Ring']].all(axis=1)
+    ls_hr = ['ER']
+    if df_data.columns.isin(['PgR_Nuclei']).any():
+        df_data['PR'] = df_data.loc[:,['tumor','PgR_Nuclei']].all(axis=1)
+        ls_hr.append('PR')
+
+    df_data['HR'] = df_data.loc[:,ls_hr].any(axis=1) & df_data.tumor
+
+    ls_marker = ['Lum','Bas','Mes'] #
+    df_diff = analyze.combinations(df_data,ls_marker)
+    df_data = df_data.merge(df_diff,how='left', left_index=True, right_index=True, suffixes = ('_all',''))
+
+    #add DiffState
+    ls_cell_names = df_diff.columns.tolist()
+    s_type_name = 'DiffState'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    #change non-tumor to NA (works!)
+    df_data.loc[df_data[df_data.celltype != 'tumor'].index,s_type_name] = np.nan
+
+    #2 ER/PR/HER2
+    ls_marker =  ['HR','HER2']
+    df_hr = analyze.combinations(df_data,ls_marker)
+    df_hr.rename({'__':'TN'},axis=1,inplace=True)
+    df_data = df_data.merge(df_hr,how='left', left_index=True, right_index=True,suffixes = ('_all',''))
+    ls_cell_names = df_hr.columns.tolist()
+    s_type_name = 'HRStatus'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    #change non-tumor to NA (works!)
+    df_data.loc[df_data[df_data.celltype != 'tumor'].index,s_type_name] = np.nan
+
+    #3 combinations: differentiation and HR status
+    ls_gate = df_diff.columns.tolist()
+    ls_marker = df_hr.columns.tolist()
+    df_gate_counts = analyze.gated_combinations(df_data,ls_gate,ls_marker)
+    df_data = df_data.merge(df_gate_counts, how='left', left_index=True, right_index=True,suffixes = ('_all',''))
+
+    # make Tumor Diff plus HR Status object column
+    ls_cell_names =  df_gate_counts.columns.tolist()
+    s_type_name = 'DiffStateHRStatus'
+    analyze.add_celltype(df_data, ls_cell_names, s_type_name)
+    #change non-tumor to NA (works!)
+    df_data.loc[df_data[df_data.celltype != 'tumor'].index,s_type_name] = np.nan
+    return(df_data)
+
+def celltype_gates(df_data,ls_gate,s_new_name,s_celltype):
+    '''
+    multipurpose for stromaTumor
+    ls_gates = 
+    '''
+    ls_gate = df_data.columns[(df_data.dtypes=='bool') & (df_data.columns.isin(ls_gate) | df_data.columns.isin([item.split('_')[0] for item in ls_gate]))].tolist()
+    #tumor signaling and proliferation
+    #rename
+    df_data.rename(dict(zip(ls_gate,[item.split('_')[0] for item in ls_gate])),axis=1,inplace=True)
+    ls_marker = [item.split('_')[0] for item in ls_gate]
+    #functional states (stromal) (don't forget to merge!)
+    df_func = analyze.combinations(df_data,ls_marker)
+    df_data = df_data.merge(df_func,how='left', left_index=True, right_index=True, suffixes = ('_all',''))
+    ls_cell_names = df_func.columns.tolist()
+    analyze.add_celltype(df_data, ls_cell_names, s_new_name)
+    #change non-tumor to NA (works!)
+    df_data.loc[df_data[df_data.celltype != s_celltype].index,s_new_name] = np.nan
+    df_data[s_new_name] = df_data.loc[:,s_new_name].replace(dict(zip(ls_cell_names,[f'{s_celltype}_{item}' for item in ls_cell_names])))
+    return(df_data)
+
+def non_tumor(df_data):
+    #one more column: all non-tumor cells
+    index_endothelial = df_data[df_data.celltype=='endothelial'].index
+    index_immune = df_data[df_data.celltype=='immune'].index
+    index_stroma = df_data[df_data.celltype=='stromal'].index
+    index_tumor = df_data[df_data.celltype=='tumor'].index
+
+    if df_data.columns.isin(['ImmuneType','StromalType']).sum() == 2:
+        #fewer cell tpyes
+        df_data.loc[index_endothelial,'NonTumor'] = 'endothelial'
+        df_data.loc[index_immune,'NonTumor'] = df_data.loc[index_immune,'ImmuneType']
+        df_data.loc[index_stroma,'NonTumor'] = df_data.loc[index_stroma,'StromalType']
+        df_data.loc[index_tumor,'NonTumor'] = np.nan
+
+        if df_data.columns.isin(['FuncImmune','CellProlif']).sum() == 2:
+            #more cell types
+            df_data.loc[index_endothelial,'NonTumorFunc'] = df_data.loc[index_endothelial,'CellProlif']
+            df_data.loc[index_immune,'NonTumorFunc'] = df_data.loc[index_immune,'FuncImmune']
+            df_data.loc[index_stroma,'NonTumorFunc'] = df_data.loc[index_stroma,'StromalType']
+            df_data.loc[index_tumor,'NonTumorFunc'] = np.nan
+    return(df_data)
diff --git a/mplex_image/getdata.py b/mplex_image/getdata.py
new file mode 100755
index 0000000..aca70dc
--- /dev/null
+++ b/mplex_image/getdata.py
@@ -0,0 +1,176 @@
+####
+# title: getdata.py
+#
+# language: Python3.6
+# date: 2018-08-00
+# license: GPL>=v3
+# author: Jenny, bue (mostly bue)
+#
+# description:
+#   python3 library to analyise guillaume segemented cyclic staining data.
+####
+
+# load library
+import csv
+import os
+import re
+
+
+# function implementaion
+# import importlib
+# importlib.reload(getdata)
+
+def get_df(
+        #s_gseg_folder_root='/graylab/share/engje/Data/',
+        #s_scene_label='Registered-Her'
+        s_folder_regex="^SlideName.*_Features$",
+        es_value_label = {"MeanIntensity","CentroidX","CentroidY"},
+        #s_df_folder_root="./",
+        #b_roundscycles=False,
+    ):
+    '''
+    input:
+        segmentation fiels from Guillaume's software, which have in the
+        "Label" column the "cell serial number" (cell)
+        and in other columns the "feature of intrests" and unintrest.
+
+        the segmentation files are ordered in such a path structure:
+        + {s_gseg_folder_root}
+            |+ {s_gseg_folder_run_regex}*_YYYY-MM-DD_*  (run)
+            |    |+ Scene 000 - Nuclei - CD32.txt (scene and protein)
+            |    |+ Scene 000 - Location - ProteinName.txt
+            |
+            |+ {s_gseg_folder_run_regex}*_YYYY-MM-DD_*
+
+    output:
+        at {s_df_folder_root} tab separated value dataframe files
+        per run and feature of intrest.
+        y-axis: protein_location
+        x-axis: scene_cell
+        + runYYYYMMDD_MeanIntensity.tsv
+        + runYYYYMMDD_{s_gseg_feature_label}.tsv
+
+    run:
+        import getdata
+        getdata.get_df(s_gseg_folder_root='ihcData', s_gseg_folder_run_regex='^BM-Her2N75')
+
+    description:
+        function to extrtact dataframe like files of features of intrest
+        from segmentation files from guilaumes segmentation software.
+    '''
+    # enter the data path
+    #os.chdir(s_gseg_folder_root)
+    
+    
+    # for each value label of intrest (such as MeanIntensity)
+    for s_value_label in es_value_label:
+
+        # for each run (such as folder BM-Her2N75-15_2017-08-07_Features)
+        # change re.search to somehow specify folder of interest
+        for s_dir in os.listdir():
+            if re.search(s_folder_regex, s_dir):
+                print(f"\nprocess {s_value_label} run: {s_dir}")
+                # enter the run directory
+                os.chdir(s_dir)
+                # extract run label from dir name
+                s_run = f"features_{s_dir.split('_')[0]}"
+                # get empty run dictionary
+                dd_run = {}
+
+                # for each data file
+                for s_file in os.listdir():
+                    if re.search("^Scene", s_file):
+                        print(f"process {s_value_label} file: {s_file} ...")
+                        # extract scene from file name
+                        ls_file = [s_splinter.strip() for s_splinter in s_file.split("-")] 
+                        s_scene = re.sub("[^0-9a-zA-Z]", "", ls_file[0].lower()) #take out any alpha numberic 
+                        # extract protein from file name
+                        if (len(ls_file) < 3):
+                            s_protein = f"{ls_file[1].split('.')[0]}" # this is dapi
+                        else:
+                            s_protein = f"{ls_file[2].split('.')[0]}_{ls_file[1]}" # others
+
+                        # for each datarow in file
+                        b_header = False  # header row inside file not yet found, so set flag false
+                        with open(s_file, newline='') as f_csv:
+                            o_reader = csv.reader(f_csv, delimiter=' ', quotechar='"')
+                            for ls_row in o_reader:
+                                if (b_header):
+                                    # extract  cell label and data vale
+                                    s_cell = ls_row[i_xcell]
+                                    s_cell = f"{'0'*(5 - len(s_cell))}{s_cell}"
+                                    o_value = ls_row[i_xvalue]
+                                    # update run dictionary via scene_cell dictionery (one scene_cell dictionary per dataframe row)
+                                    s_scene_cell = f"{s_scene}_cell{s_cell}"
+                                    try:
+                                        d_scene_cell = dd_run[s_scene_cell]  # we have already some data from this scene_cell
+                                    except KeyError:
+                                        d_scene_cell = {}  # this is the first time we deal with this scene_cell
+                                    # update scene_cell dictionary with data values (one value inside dataframe row)
+                                    try:
+                                        o_there = d_scene_cell[s_protein]
+                                        sys.exit(f"Error @ getDataframe : in run {s_run} code tries to populate dataframe row {s_scene_cell} column {s_protein} with a secound time (there:{o_there} new:{o_value}). this should never happen. code is messed up.")
+                                    except KeyError:
+                                        d_scene_cell.update({s_protein: o_value})
+                                        dd_run.update({s_scene_cell: d_scene_cell})
+                                else:
+                                    #  extract cell label and data value of intrest column position
+                                    i_xcell = ls_row.index("Label")
+                                    i_xvalue = ls_row.index(s_value_label)
+                                    b_header = True # header row found and information extracted, so set flag True
+
+                # write run dictionar of dictionary into dataframe like file
+                b_header = False
+                s_file_output = f"../{s_run}_{s_value_label}.tsv"
+                print(f"write file: {s_file_output}")
+                with open(s_file_output, 'w', newline='') as f:
+                    for s_scene_cell in sorted(dd_run):
+                        ls_datarow = [s_scene_cell]
+                        # handle protein column label row
+                        if not (b_header):
+                            ls_protein = sorted(dd_run[s_scene_cell])
+                            print(ls_protein)
+                            f.write("\t" + "\t".join(ls_protein) + "\n")
+                            b_header = True
+                        # handle data row
+                        for s_protein in ls_protein:
+                            o_value = dd_run[s_scene_cell][s_protein]
+                            ls_datarow.append(o_value)
+                        f.write("\t".join(ls_datarow) + "\n")
+                        # sanity check
+                        if (len(ls_protein) != (len(ls_datarow) -1)):
+                            sys.exit(f"Error @ getDataframe : at {s_scene_cell} there are {len(ls_datarow) - len(ls_protein) -1} more proteins then in the aready writen rows")
+
+                # jump back to the data path
+                os.chdir("..")
+
+    return(dd_run)
+
+
+def dfextract(df_origin, s_extract, axis=0):
+    '''
+    input:
+        df_origin: dataframe
+        s_extract: index or column marker to be extacted
+        axis: 0 specifies index to be extracted,
+          1 specifies columns to be extracted
+
+    output:
+        df_extract: extracted dataframe
+
+    run:
+        import cycnorm
+        cycnorm.dfyextract(df_scene, s_extract='CD74')
+        cycnorm.dfextract(df_run, s_scene='scene86')
+
+    description:
+        function can extract e.g.
+        specific scene datafarme from gseg2df generated run datafarme or
+        specific protein from a scene dataframe.
+    '''
+    if (axis == 0):
+        df_extract = df_origin.loc[df_origin.index.str.contains(s_extract),:]
+    else:
+        df_extract = df_origin.loc[:,df_origin.columns.str.contains(s_extract)]
+    # output
+    return(df_extract)
diff --git a/mplex_image/imagine.py b/mplex_image/imagine.py
new file mode 100755
index 0000000..f705318
--- /dev/null
+++ b/mplex_image/imagine.py
@@ -0,0 +1,504 @@
+###
+# title: pysci.imagine.py
+#
+# language Python3
+# license: GPLv3
+# author: bue
+# date: 2019-01-31
+#
+# run:
+#    form pysci import imagine
+#
+# description:
+#    my image analysis library
+####
+
+# library
+import numpy as np
+import pandas as pd
+
+# function
+def slide_up(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row up.
+        top row get deleted,
+        bottom row of zeros is inserted.
+
+    description:
+      inspired by np.roll function, though elements that roll
+      beyond the last position are not re-introduced at the first.
+    """
+    a = np.delete(np.insert(a, -1, 0, axis=0), 0, axis=0)
+    return(a)
+
+
+def slide_down(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row down.
+        top row of zeros is inserted.
+        bottom row get deleted,
+
+    description:
+      inspired by np.roll function, though elements that roll
+      beyond the last position are not re-introduced at the first.
+    """
+    a = np.delete(np.insert(a, 0, 0, axis=0), -1, axis=0)
+    return(a)
+
+
+def slide_left(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one column left.
+        left most column gets deleted,
+        right most a column of zeros is inserted.
+
+    description:
+      inspired by np.roll function, though elements that roll
+      beyond the last position are not re-introduced at the first.
+    """
+    a = np.delete(np.insert(a, -1, 0, axis=1), 0, axis=1)
+    return(a)
+
+
+def slide_right(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one column right.
+        left most a column of zeros is inserted.
+        right most column gets deleted,
+
+    description:
+      inspired by np.roll function, though elements that roll
+      beyond the last position are not re-introduced at the first.
+    """
+    a = np.delete(np.insert(a, 0, 0, axis=1), -1, axis=1)
+    return(a)
+
+
+def slide_upleft(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row up and one column left.
+
+    description:
+      inspired by np.roll function.
+    """
+    a = slide_left(slide_up(a))
+    return(a)
+
+
+def slide_upright(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row up and one column right.
+
+    description:
+      inspired by np.roll function.
+    """
+    a = slide_right(slide_up(a))
+    return(a)
+
+
+def slide_downleft(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row down  and one column left.
+
+    description:
+      inspired by np.roll function.
+    """
+    a = slide_left(slide_down(a))
+    return(a)
+
+
+def slide_downright(a):
+    """
+    input:
+      a: numpy array
+
+    output:
+      a: input numpy array shifted one row down and one column right.
+
+    description:
+      inspired by np.roll function.
+    """
+    a = slide_right(slide_down(a))
+    return(a)
+
+
+
+def get_border(ai_basin):
+    """
+    input:
+      ai_basin: numpy array representing a cells or nuclei basin file.
+        it is assumed that basin borders are represented by 0 values,
+        and basins are represented with any values different from 0.
+        ai_basin = skimage.io.imread("cells_basins.tif")
+
+    output:
+      ai_border: numpy array containing only the cell or nuclei basin border.
+        border value will be 1, non border value will be 0.
+
+    description:
+      algorithm to extract the basin borders form basin numpy arrays.
+    """
+    ab_border_up = (ai_basin - slide_up(ai_basin)) != 0
+    ab_border_down = (ai_basin - slide_down(ai_basin)) != 0
+    ab_border_left = (ai_basin - slide_left(ai_basin)) != 0
+    ab_border_right = (ai_basin - slide_right(ai_basin)) != 0
+    ab_border_upleft = (ai_basin - slide_upleft(ai_basin)) != 0
+    ab_border_upright = (ai_basin - slide_upright(ai_basin)) != 0
+    ab_border_downleft = (ai_basin - slide_downleft(ai_basin)) != 0
+    ab_border_downright = (ai_basin - slide_downright(ai_basin)) != 0
+    ab_border = ab_border_up | ab_border_down | ab_border_left | ab_border_right | ab_border_upleft | ab_border_upright | ab_border_downleft | ab_border_downright 
+    ai_border = ab_border * 1
+    return(ai_border)
+
+
+def collision(ai_basin, i_step_size=1):
+    """
+    input:
+      ai_basin: numpy array representing a cells basin file.
+        it is assumed that basin borders are represented by 0 values,
+        and basins are represented with any values different from 0.
+        ai_basin = skimage.io.imread("cells_basins.tif")
+
+    i_step_size: integer that specifies the distance from a basin
+        where collisions with other basins are detected.
+        increasing the step size behind > 1 will result in faster processing
+        but less certain results. step size < 1 make no sense.
+        default step size is 1.
+
+    output:
+        eti_collision: a set of tuples representing colliding basins.
+
+    description:
+        algorithm to detect which basin collide a given step size away.
+    """
+    eti_collision = set()
+    for o_slide in {slide_up, slide_down, slide_left, slide_right, slide_upleft, slide_upright, slide_downleft, slide_downright}:
+        ai_walk = ai_basin.copy()
+        for _ in range(i_step_size):
+            ai_walk = o_slide(ai_walk)
+        ai_alice = ai_walk[(ai_basin != 0) & (ai_walk != 0)]
+        ai_bob = ai_basin[(ai_basin != 0) & (ai_walk != 0)]
+        eti_collision = eti_collision.union(set(
+            zip(
+                ai_alice[(ai_alice != ai_bob)],
+                ai_bob[(ai_bob != ai_alice)]
+            )
+        ))
+    # return
+    return(eti_collision)
+
+
+def grow(ai_basin, i_step=1):
+    """
+    input:
+      ai_basin: numpy array representing a cells basin file.
+        it is assumed that basin borders are represented by 0 values,
+        and basins are represented with any values different from 0.
+        ai_basin = skimage.io.imread("cells_basins.tif")
+
+      i_step: integer which specifies how many pixels the basin
+        to  each direction should grow
+
+    output:
+      ai_grown: numpy array with the grown basins
+
+    description:
+      algorithm to grow the basis in a given basin numpy array.
+      growing happens counterclockwise.
+    """
+    ai_grown = ai_basin.copy()
+    for _ in range(i_step):
+        for o_slide in {slide_up, slide_upleft, slide_left, slide_downleft, slide_down, slide_downright, slide_right, slide_upright}:
+            ai_alice = ai_basin.copy()
+            ai_evolve = o_slide(ai_alice)
+            ai_alice[(ai_evolve != ai_alice) & (ai_alice == 0)] = ai_evolve[(ai_evolve != ai_alice) & (ai_alice == 0)]
+            # update grown
+            ai_grown[(ai_alice != ai_grown) & (ai_grown == 0)] = ai_alice[(ai_alice != ai_grown) & (ai_grown == 0)]
+    # output
+    return(ai_grown)
+
+
+def touching_cells(ai_basin, i_border_width=0, i_step_size=1):
+    """
+    input:
+      ai_basin: numpy array representing a cells basin file.
+        it is assumed that basin borders are represented by 0 values,
+        and basins are represented with any values different from 0.
+        ai_basin = skimage.io.imread("cells_basins.tif")
+
+      i_border_width: maximal acceptable border with in pixels.
+        this is half of the range how far two the adjacent cell maximal
+        can be apart and still are regarded as touching each other.
+
+      i_step_size: step size by which the border width is sampled for
+        touching cells.
+        increase the step size behind > 1 will result in faster processing
+        but less certain results. step size < 1 make no sense.
+        default step size is 1.
+
+    output:
+      dei_touch: a dictionary that for each basin states
+        which other basins are touching.
+
+    description:
+      algorithm to extract the touching basins from a cell basin numpy array.
+      algorithm inspired by C=64 computer games with sprit collision.
+    """
+
+    # detect neighbors
+    eti_collision = set()
+    ai_evolve = ai_basin.copy()
+    for _ in range(-1, i_border_width, i_step_size):
+        # detect cell border collision
+        eti_collision = eti_collision.union(
+            collision(ai_basin=ai_evolve, i_step_size=i_step_size)
+        )
+        # grow basin
+        ai_evolve = grow(ai_basin=ai_evolve, i_step=i_step_size)
+
+    # transform set of tuple of alice and bob collision to dictionary of sets
+    dei_touch = {}
+    ei_alice = set(np.ndarray.flatten(ai_basin))
+    ei_alice.remove(0)
+    for i_alice in ei_alice:
+        dei_touch.update({i_alice : set()})
+    for i_alice, i_bob in eti_collision:
+        ei_bob = dei_touch[i_alice]
+        ei_bob.add(i_bob)
+        dei_touch.update({i_alice : ei_bob})
+
+    # output
+    return(dei_touch)
+
+
+def detouch2df(deo_abc, ls_column=["cell_center","cell_touch"]):
+    """
+    input:
+        deo_touch: touching_cells generated dictionary
+        ls_column: future dictionary_key dictionary_value column name
+
+    output:
+        df_touch: dataframe which contains the same information
+          as the input deo_touch dictionary.
+
+    description:
+        transforms dei_touch dictionary into a two column dataframe.
+    """
+    lo_key_total= []
+    lo_value_total = []
+    for o_key, eo_value in deo_abc.items():
+        try:
+            lo_value = sorted(eo_value, key=int)
+        except ValueError:
+            lo_value = sorted(eo_value)
+        # extract form dictionary
+        if (len(lo_value) == 0):
+            lo_key_total.append(o_key)
+            lo_value_total.append(0)
+        else:
+            lo_key_total.extend([o_key] * len(lo_value))
+            lo_value_total.extend(lo_value)
+    # generate datafarme
+    df_touch = pd.DataFrame([lo_key_total,lo_value_total], index=ls_column).T
+    return(df_touch)
+
+
+def imgfuse(laaai_in):
+    """
+    input:
+        laaai_in: list of 3 channel (RGB) images
+
+    output:
+       aaai_out: fused 3 channel image
+
+    description:
+       code to fuse many RGB images into one.
+    """
+    # check shape
+    ti_shape = None
+    for aaai_in in laaai_in:
+        if (ti_shape is None):
+            ti_shape = aaai_in.shape
+        else:
+           if (aaai_in.shape != ti_shape):
+               sys.exit(f"Error: input images have not the same shape. {aaai_in.shape} != {aaai_in}.")
+
+    # fuse images
+    llli_channel = []
+    for i_channel in range(ti_shape[0]):
+        lli_matrix = []
+        for i_y in range(ti_shape[1]):
+            li_row = []
+            for i_x in range(ti_shape[2]):
+                #print(f"{i_channel} {i_y} {i_x}")
+                li_px = []
+                for aaai_in in laaai_in:
+                    i_in = aaai_in[i_channel,i_y,i_x]
+                    if (i_in != 0):
+                        li_px.append(i_in)
+                if (len(li_px) != 0):
+                    i_out = np.mean(li_px)
+                else:
+                    i_out = 0
+                li_row.append(int(i_out))
+            lli_matrix.append(li_row)
+        llli_channel.append(lli_matrix)
+
+    # output
+    aaai_out = np.array(llli_channel)
+    return(aaai_out)
+
+
+
+# test code
+if __name__ == "__main__":
+
+    # load basins tiff into numpy array
+    '''
+    import matplotlib.pyplot as plt
+    import skimage as ski
+    a_tiff = ski.io.imread("cells_basins.tif")
+    plt.imshow(a_tiff)
+    '''
+
+    # generate test data
+    a = np.array([
+        [0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,4,0,0,0],
+        [0,0,0,1,1,1,0,0,0,0,0,0,0,0],
+        [0,0,0,1,1,1,0,0,0,0,0,0,0,0],
+        [0,0,0,1,1,1,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,2,2,2,0,0,0],
+        [0,0,0,0,3,3,3,0,2,2,2,0,0,0],
+        [0,0,0,0,3,3,3,0,2,2,2,0,0,0],
+        [0,0,0,0,3,3,3,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+    ])
+
+    b = np.array([
+        [0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,1,0,0,0,0,0,0,0],
+        [0,0,0,0,1,2,0,0,0,0,0],
+        [0,0,0,0,0,1,2,0,0,0,0],
+        [0,0,0,0,0,0,0,2,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0,0],
+    ])
+
+    c = np.array([
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,1,0,0,0,0,0],
+        [0,0,0,0,0,1,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+        [0,0,0,0,0,0,0,0,0,0],
+    ])
+
+    # run get_border
+    print("\nborderwall_tm")
+    print(a)
+    print(get_border(a))
+    #plt.imshow(get_border(a_tiff))
+
+    # run grow
+    '''
+    print("\ngrow")
+    print(c)
+    print(grow(c))
+    print(grow(grow(c)))
+    print(grow(c, i_step_size=2))
+    print(b)
+    print(grow(b))
+    print(grow(grow(b)))
+    print(grow(b, i_step_size=2))
+    '''
+
+    # run collision
+    '''
+    print("\ncollision")
+    print(c)
+    print(collision(c))
+    print(b)
+    print(collision(b))
+    print(c)
+    print(collision(c))
+    '''
+
+    # run touching_cells
+    print("\ntouch")
+    #print(a)
+    print(touching_cells(a, i_border_width=0))
+    print(touching_cells(a, i_border_width=1))
+    print(touching_cells(a, i_border_width=2))
+    print(touching_cells(a, i_border_width=3))
+    print(touching_cells(a, i_border_width=4))
+    print(touching_cells(a, i_border_width=4, i_step_size=2))
+    #touching_cells(a_tiff, i_border_width=1)
+
+
+    # img fuse
+    aaai_1 = np.array([
+        [[1,1,1],[2,2,2],[3,3,3]],
+        [[0,0,0,],[0,0,0],[0,0,0]],
+        [[0,0,0],[0,0,0],[0,0,0]],
+    ])
+    aaai_2 = np.array([
+        [[0,0,0,],[0,0,0],[0,0,0]],
+        [[1,1,1],[2,2,2],[3,3,3]],
+        [[0,0,0],[0,0,0],[0,0,0]],
+    ])
+    aaai_3 = np.array([
+        [[0,0,0,],[0,0,0],[0,0,0]],
+        [[0,0,0],[0,0,0],[0,0,0]],
+        [[1,1,1],[2,2,2],[3,3,3]],
+    ])
+    aaai_4 = np.array([
+        [[1,1,1],[2,2,2],[3,3,3]],
+        [[1,1,1],[2,2,2],[3,3,3]],
+        [[0,0,0],[0,0,0],[0,0,0]],
+    ])
+    aaai_5 = np.array([
+        [[0,0,0,],[0,0,0],[0,0,0]],
+        [[1,1,1],[2,2,2],[3,3,3]],
+        [[1,1,1],[2,2,2],[3,3,3]],
+    ])
+    aaai_out = imgfuse([aaai_1, aaai_2, aaai_3, aaai_4, aaai_5])
+    print("fused 3channel image:\n", aaai_out, type(aaai_out))
diff --git a/mplex_image/metadata.py b/mplex_image/metadata.py
new file mode 100755
index 0000000..4d49424
--- /dev/null
+++ b/mplex_image/metadata.py
@@ -0,0 +1,176 @@
+####
+# title: metadata.py
+#
+# language: Python3.7
+# date: 2020-07-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library using python bioformats to extract image metadata
+####
+
+
+#libraries
+import matplotlib as mpl
+mpl.use('agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage
+import pandas as pd
+import bioformats 
+#import javabridge
+import re
+import shutil
+from itertools import chain, compress
+import matplotlib.ticker as ticker
+from mplex_image import cmif
+
+# mpimage
+#functions
+
+def get_exposure(s_image, s_find="Information\|Image\|Channel\|ExposureTime\<\/Key\>\<Value\>"):
+
+    s_meta = bioformats.get_omexml_metadata(path=s_image)
+    o = bioformats.OMEXML(s_meta)
+    print(o.image().Name)
+    print(o.image().AcquisitionDate)
+
+    li_start = [m.start() for m in re.finditer(s_find, s_meta)]
+    if len(li_start)!=1:
+        print('Error: found wrong number of exposure times')
+
+    ls_exposure = []
+    for i_start in li_start:
+        ls_exposure.append(s_meta[i_start:i_start+200])
+    s_exposure =  ls_exposure[0].strip(s_find)
+    s_exposure = s_exposure[1:s_exposure.find(']')]
+    ls_exposure = s_exposure.split(',')
+    li_exposure = [int(item)/1000000 for item in ls_exposure]
+    return(li_exposure,s_meta)
+
+def get_exposure_sample(s_sample,df_img):
+    """
+    return a dataframe with all exposure times for a sample (slide)
+    """
+    #make dataframe of exposure time metadata
+    df_exposure = pd.DataFrame()
+    ls_image = os.listdir()
+    df_sample = df_img[df_img.index.str.contains(s_sample)]
+    for s_image in df_sample.index:
+                        print(s_image)
+                        li_exposure, s_meta = get_exposure(s_image)
+                        se_times = pd.Series(li_exposure,name=s_image)
+                        df_exposure = df_exposure.append(se_times)
+    return(df_exposure)
+
+def get_meta(s_image, s_find = 'Scene\|CenterPosition\<\/Key\>\<Value\>\['):
+    """czi scene metadata
+    s_image = filename
+    s_find = string to find in the omexml metadata
+    returns: 
+    ls_exposure = list of 200 character strings following s_find in metadata
+    s_meta = the whole metadata string
+    """
+    s_meta = bioformats.get_omexml_metadata(path=s_image)
+    o = bioformats.OMEXML(s_meta)
+    #print(o.image().Name)
+    #print(o.image().AcquisitionDate)
+
+    li_start = [m.start() for m in re.finditer(s_find, s_meta)]
+    if len(li_start)!=1:
+        print('Error: found wrong number of exposure times')
+
+    ls_exposure = []
+    for i_start in li_start:
+        ls_exposure.append(s_meta[i_start:i_start+200])
+    s_exposure =  ls_exposure[0].strip(s_find)
+    s_exposure = s_exposure[0:s_exposure.find(']')]
+    ls_exposure = s_exposure.split(',')
+    #li_exposure = [int(item)/1000000 for item in ls_exposure]
+    return(ls_exposure,s_meta)
+
+def scene_position(czidir,type):
+    """
+    get a dataframe of scene positions for each round/scene in TMA
+    """
+    os.chdir(f'{czidir}')
+    df_img = cmif.parse_czi('.',type=type)
+
+    #javabridge.start_vm(class_path=bioformats.JARS)
+    for s_image in df_img.index:
+        print(s_image)
+        ls_exposure,s_meta = get_meta(s_image)
+        df_img.loc[s_image,'Scene_X'] = ls_exposure[0]
+        df_img.loc[s_image,'Scene_Y'] = ls_exposure[1]
+
+    #javabridge.kill_vm()
+
+    df_img = df_img.sort_values(['rounds','scanID','scene']).drop('data',axis=1)
+    return(df_img)
+
+
+    ls_exposure,s_meta = get_meta(s_image, s_find = 'Scene\|CenterPosition\<\/Key\>\<Value\>\[')
+
+def exposure_times_scenes(df_img,codedir,czidir,s_end='.czi'):
+    """
+    get a csv of exposure times for each slide
+    """
+    #go to directory
+    os.chdir(czidir)
+    #export exposure time
+    s_test = sorted(compress(os.listdir(),[item.find(s_end) > -1 for item in os.listdir()]))[1]#[0]
+    s_find = f"{s_test.split('-Scene-')[1].split(s_end)[0]}"
+    for s_sample in sorted(set(df_img.slide)):
+        print(s_sample)
+        df_img_slide = df_img[(df_img.slide==s_sample) & (df_img.scene==s_find)]
+        print(len(df_img_slide))
+        df_exp = get_exposure_sample(s_sample,df_img_slide)
+        df_exp.to_csv(f'{codedir}/{s_sample}_ExposureTimes.csv',header=True,index=True)
+
+def exposure_times(df_img,codedir,czidir):
+    """
+    get a csv of exposure times for each slide
+    """
+    #go to directory
+    os.chdir(czidir)
+    print(czidir)
+    #export exposure time
+    for s_sample in sorted(set(df_img.slide)):
+        df_img_slide = df_img[df_img.slide==s_sample]
+        df_exp = get_exposure_sample(s_sample,df_img_slide)
+        df_exp.to_csv(f'{codedir}/{s_sample}_ExposureTimes.csv',header=True,index=True)
+    #close java virtual machine
+    #javabridge.kill_vm()
+
+def exposure_times_slide(df_img,codedir,czidir):
+    if len(df_img.scene.unique()) == 1:
+        exposure_times(df_img,codedir,czidir)
+    elif len(df_img.scene.unique()) > 1:
+        exposure_times_scenes(df_img,codedir,czidir,s_end='.czi')
+
+def export_tiffs(df_img, s_sample,tiffdir):
+    """
+    export the tiffs of each tile
+    """
+    #start java virtual machine
+    #javabridge.start_vm(class_path=bioformats.JARS)
+
+    #export tiffs
+    df_img_slide = df_img[df_img.slide==s_sample]
+    for path in df_img_slide.index:
+        print(path)
+        img = bioformats.load_image(path) #looks like it only loads the first tile
+        img_new = img*65535
+        img_16 = img_new.astype(np.uint16)
+        i_channels = img_16.shape[2]
+        for i_channel in range(i_channels):
+           print(f'channel {i_channel}')
+           bioformats.write_image(f'{tiffdir}/{path.split(".czi")[0]}_c{str(i_channel+1)}_ORG.tif', pixels=img_16[:,:,i_channel],pixel_type='uint16')
+           break
+        break
+    a_test = img_16[:,:,i_channel]
+    aa_test = img_16
+    #javabridge.kill_vm()
+    return(a_test,aa_test, img)
diff --git a/mplex_image/mics.py b/mplex_image/mics.py
new file mode 100755
index 0000000..d16b479
--- /dev/null
+++ b/mplex_image/mics.py
@@ -0,0 +1,581 @@
+# wrapper functions for codex image processing
+
+from mplex_image import preprocess, mpimage, getdata, process, analyze, cmif, features, ometiff
+import os
+import pandas as pd
+import math
+import skimage
+from skimage import io, filters
+import re
+import numpy as np
+import json
+from skimage.util import img_as_uint
+import tifffile
+
+def parse_processed():
+    '''
+    parse the file names of processed Macsima images
+    '''
+    df_img = mpimage.filename_dataframe(s_end ="ome.tif",s_start='R',s_split='___')
+    #standardize dapi naming
+    ls_dapi_index = df_img[df_img.index.str.contains('DAPI')].index.tolist()
+    d_replace = dict(zip(ls_dapi_index, [item.replace('DAPIV0','DAPI__DAPIV0') for item in ls_dapi_index]))
+    df_img['data'] = df_img.data.replace(d_replace)
+    #standardize AF naming
+    ls_dapi_index = df_img[df_img.index.str.contains('autofluorescence')].index.tolist()
+    d_replace = dict(zip(ls_dapi_index, [item.replace('autofluorescence_FITC','autofluorescence-FITC__FITC') for item in ls_dapi_index]))
+    df_img['data'] = df_img.data.replace(d_replace)
+    d_replace = dict(zip(ls_dapi_index, [item.replace('autofluorescence_PE','autofluorescence-PE__PE') for item in ls_dapi_index]))
+    df_img['data'] = df_img.data.replace(d_replace)
+    #standardize empty naming
+    ls_dapi_index = df_img[df_img.index.str.contains('empty')].index.tolist()
+    d_replace = dict(zip(ls_dapi_index, [item.replace('empty','empty__empty') for item in ls_dapi_index]))
+    df_img['data'] = df_img.data.replace(d_replace)
+    df_img['marker'] = [item.split(f"{item.split('_')[3]}_")[-1].split('__')[0] for item in df_img.data]
+    df_img['cycle'] = [item.split('_')[3] for item in df_img.data]
+    df_img['rounds'] = [item.split('_')[3].replace('C-','R') for item in df_img.data]
+    df_img['clone'] = [item.split('__')[1].split('.')[0] for item in df_img.data]
+    #standardize marker naming
+    d_replace = dict(zip(df_img.marker.tolist(),[item.replace('_','-') for item in df_img.marker.tolist()]))
+    df_img['data'] =  [item.replace(f'''{item.split(f"{item.split('_')[3]}_")[-1].split('__')[0]}''',f'''{d_replace[item.split(f"{item.split('_')[3]}_")[-1].split('__')[0]]}''') for item in df_img.data]
+    df_img['exposure'] = [int(item.split('__')[1].split('_')[1].split('.')[0]) for item in df_img.data]
+    df_img['channel'] = [item.split('__')[1].split('_')[0].split('.')[1] for item in df_img.data]
+    d_replace = {'DAPI':'c1', 'FITC':'c2', 'PE':'c3', 'APC':'c4'}
+    df_img['color'] = [item.replace(item, d_replace[item]) for item in df_img.channel]
+    df_img['rack'] = [item.split('_')[0] for item in df_img.data]
+    df_img['slide'] = [item.split('_')[1] for item in df_img.data]
+    df_img['scene'] = [item.split('_')[2] for item in df_img.data]
+    return(df_img)
+
+def parse_org():
+    '''
+    parse the file names of copied (name-stadardized) Macsima images
+    '''
+    s_path = os.getcwd()
+    df_img = mpimage.filename_dataframe(s_end ="tif",s_start='R',s_split='___')
+    df_img['marker'] = [item.split(f"{item.split('_')[3]}_")[-1].split('__')[0] for item in df_img.data]
+    df_img['cycle'] = [item.split('_')[3] for item in df_img.data]
+    df_img['rounds'] = [item.split('_')[3].replace('C-','R') for item in df_img.data]
+    df_img['clone'] = [item.split('__')[1].split('.')[0] for item in df_img.data]
+    df_img['exposure'] = [int(item.split('__')[1].split('_')[1].split('.')[0]) for item in df_img.data]
+    df_img['channel'] = [item.split('__')[1].split('_')[0].split('.')[1] for item in df_img.data]
+    d_replace = {'DAPI':'c1', 'FITC':'c2', 'PE':'c3', 'APC':'c4'}
+    df_img['color'] = [item.replace(item, d_replace[item]) for item in df_img.channel]
+    df_img['rack'] = [item.split('_')[0] for item in df_img.data]
+    df_img['slide'] = [item.split('_')[1] for item in df_img.data]
+    df_img['scene'] = [item.split('_')[2] for item in df_img.data]
+    df_img['slide_scene'] = df_img.slide + '_' + df_img.scene
+    df_img['path'] = [f"{s_path}/{item}" for item in df_img.index]
+    return(df_img)
+
+def copy_processed(df_img,regdir,i_lines=32639):
+    '''
+    copy the highest exposure time images for processing
+    '''
+    for s_marker in sorted(set(df_img.marker) - {'DAPI','autofluorescence','empty'}):
+        df_marker = df_img[df_img.marker==s_marker]
+        for s_cycle in sorted(set(df_marker.cycle)):
+            for s_index in df_marker[df_marker.cycle==s_cycle].sort_values('exposure',ascending=False).index.tolist():
+                a_img = io.imread(s_index)
+                s_dir_new = s_index.split(f"_{df_img.loc[s_index,'cycle']}")[0]
+                s_index_new = df_img.loc[s_index,'data'].split('.ome.tif')[0]
+                preprocess.cmif_mkdir([f'{regdir}/{s_dir_new}'])
+                print(a_img.max())
+                #get rid of lines
+                a_img[a_img==i_lines] = a_img.min()
+                if a_img.max() < 65535:
+                    io.imsave(f'{regdir}/{s_dir_new}/{s_index_new}.tif',a_img,plugin='tifffile',check_contrast=False)
+                    break
+                else:
+                    print('Try lower exposure time')
+    for s_index in df_img[df_img.marker=='DAPI'].index.tolist():
+        a_img = io.imread(s_index)
+        print(f'DAPI max: {a_img.max()}')
+        if df_img.loc[s_index,'rounds'] != 'R0': #keep lines in R0 dapi, for segmentation
+            a_img[a_img==i_lines] = a_img.min()
+        s_dir_new = s_index.split(f"_{df_img.loc[s_index,'cycle']}")[0]
+        s_index_new = df_img.loc[s_index,'data'].split('.ome.tif')[0]
+        preprocess.cmif_mkdir([f'{regdir}/{s_dir_new}'])
+        io.imsave(f'{regdir}/{s_dir_new}/{s_index_new}.tif',a_img,plugin='tifffile',check_contrast=False)
+
+def extract_cellpose_features(s_sample, segdir, regdir, ls_seg_markers, nuc_diam, cell_diam):
+    '''
+    load the segmentation results, the input images, and the channels images
+    extract mean intensity from each image, and centroid, area and eccentricity for 
+    '''
+    df_sample = pd.DataFrame()
+    df_thresh = pd.DataFrame()
+    os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    ls_scene = []
+    d_match = {}
+    for s_file in os.listdir():
+        if s_file.find(f'{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins')>-1:
+            ls_scene.append(s_file.split(f'_{".".join(ls_seg_markers)}')[0])
+            d_match.update({s_file.split(f'_{".".join(ls_seg_markers)}')[0]:s_file})
+    for s_scene in ls_scene:
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+        print(f'processing {s_scene}')
+        for s_file in os.listdir():
+            if s_file.find(s_scene) > -1:
+                if s_file.find("DAPI.png") > -1:
+                    s_dapi = s_file
+        dapi = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{s_dapi}')
+        print(f'loading {s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        cell_labels = io.imread(f'{segdir}/{s_sample}Cellpose_Segmentation/{d_match[s_scene]}')
+        print(f'loading {d_match[s_scene]}')
+        #nuclear features
+        df_feat = features.extract_feat(labels,dapi, properties=(['label']))
+        df_feat.columns = [f'{item}_segmented-nuclei' for item in df_feat.columns]
+        df_feat.index = [f'{s_sample}_cell{item}' for item in df_feat.loc[:,'label_segmented-nuclei']]
+
+        #get subcellular regions
+        cyto = features.label_difference(labels,cell_labels)
+        d_loc_nuc = features.subcellular_regions(labels, distance_short=2, distance_long=5)
+        d_loc_cell = features.subcellular_regions(cell_labels, distance_short=2, distance_long=5)
+        d_loc = {'nuclei':labels,'cell':cell_labels,'cytoplasm':cyto,
+         'nucmem':d_loc_nuc['membrane'][0],'cellmem':d_loc_cell['membrane'][0],
+         'perinuc5':d_loc_nuc['ring'][1],'exp5':d_loc_nuc['grown'][1],
+         'nucadj2':d_loc_nuc['straddle'][0],'celladj2':d_loc_cell['straddle'][0]}
+
+        #subdir organized by slide or scene
+        if os.path.exists(f'{regdir}/{s_sample}'):
+            os.chdir(f'{regdir}/{s_sample}')
+        elif os.path.exists(f'{regdir}/{s_scene}'):
+            os.chdir(f'{regdir}/{s_scene}')
+        else:
+            os.chdir(f'{regdir}')
+        df_img = parse_org()
+        df_img['round_int'] = [int(re.sub('[^0-9]','', item)) for item in df_img.rounds] 
+        df_img = df_img[df_img.round_int < 90]
+        df_img = df_img.sort_values('round_int')
+        #take into account slide (well)
+        df_scene = df_img[df_img.slide_scene==s_scene]
+        #load each image
+        for s_index in df_scene.index:
+                intensity_image = io.imread(s_index)
+                df_thresh.loc[s_index,'threshold_li'] =  filters.threshold_li(intensity_image)
+                if intensity_image.mean() > 0:
+                    df_thresh.loc[s_index,'threshold_otsu'] = filters.threshold_otsu(intensity_image)
+                    df_thresh.loc[s_index,'threshold_triangle'] = filters.threshold_triangle(intensity_image)
+                s_marker = df_scene.loc[s_index,'marker']
+                print(f'extracting features {s_marker}')
+                if s_marker == 'DAPI':
+                    s_marker = s_marker + f'{df_scene.loc[s_index,"rounds"].split("R")[1]}'
+                for s_loc, a_loc in d_loc.items():
+                    if s_loc == 'nuclei':
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity','centroid','area','eccentricity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_centroid-0',f'{s_marker}_{s_loc}_centroid-1',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity',f'{s_marker}_{s_loc}_label']
+                    elif s_loc == 'cell':
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity','euler_number','area','eccentricity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_euler',f'{s_marker}_{s_loc}_area',f'{s_marker}_{s_loc}_eccentricity',f'{s_marker}_{s_loc}_label']
+                    else:
+                        df_marker_loc = features.extract_feat(a_loc,intensity_image, properties=(['mean_intensity','label']))
+                        df_marker_loc.columns = [f'{s_marker}_{s_loc}',f'{s_marker}_{s_loc}_label']
+                    #set array ids as index
+                    df_marker_loc.index = df_marker_loc.loc[:,f'{s_marker}_{s_loc}_label']
+                    df_marker_loc.index = [f'{s_sample}_cell{item}' for item in df_marker_loc.index]
+                    df_feat = df_feat.merge(df_marker_loc, left_index=True,right_index=True,how='left',suffixes=('',f'{s_marker}_{s_loc}'))
+        df_sample = df_sample.append(df_feat)
+    return(df_sample, df_thresh)
+
+def combine_labels(s_sample,segdir, subdir, ls_seg_markers, nuc_diam, cell_diam, df_mi_full,s_thresh):
+    '''
+    - load cell labels; delete cells that were not used for cytoplasm (i.e. ecad neg)
+    - nuc labels, expand to perinuc 5 and then cut out the cell labels
+    - keep track of cells that are completely coverd by another cell (or two or three: counts as touching).
+    '''
+    se_neg = df_mi_full[df_mi_full.slide == s_sample].loc[:,f'{s_thresh}_negative']
+    dd_result = {}
+    if os.path.exists(f'{segdir}/{s_sample}Cellpose_Segmentation'):
+        os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+    else:
+        os.chdir(segdir)
+    ls_scene = []
+    for s_file in os.listdir():
+        if s_file.find(' - DAPI.png') > -1:
+            ls_scene.append(s_file.split(' - DAPI.png')[0])
+    ls_scene = sorted(set(df_mi_full[df_mi_full.slide == s_sample].scene) & set(ls_scene))
+    for s_scene in ls_scene:
+        se_neg_scene = se_neg[se_neg.index.str.contains(s_scene)]
+
+        print(f'Processing combined segmentaiton labels for {s_scene}')
+        if os.path.exists(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif'):
+            labels = io.imread(f'{s_scene} nuclei{nuc_diam} - Nuclei Segmentation Basins.tif')
+        else:
+            print('no nuclei labels found')
+        if os.path.exists(f'{s_scene} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        elif os.path.exists(f'{s_scene}_{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene}_{".".join(ls_seg_markers)} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        elif os.path.exists(f'{s_scene}_{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins.tif'):
+            cell_labels = io.imread(f'{s_scene}_{".".join(ls_seg_markers)} nuc{nuc_diam} matchedcell{cell_diam} - Cell Segmentation Basins.tif')
+        else:
+            print('no cell labels found')
+        #set non-ecad cell labels to zero
+        a_zeros = np.array([int(item.split('_cell')[1]) for item in se_neg_scene[se_neg_scene].index]).astype('int64')
+        mask = np.isin(cell_labels, a_zeros)
+        cell_labels_copy = cell_labels.copy()
+        cell_labels_copy[mask] = 0
+        #make the nuclei under cells zero
+        labels_copy = labels.copy()
+        distance = 5
+        perinuc5, labels_exp = features.expand_label(labels,distance=distance)
+        labels_exp[cell_labels_copy > 0] = 0
+        #combine calls and expanded nuclei
+        combine = (labels_exp + cell_labels_copy)
+        if s_scene.find('Scene') == 0:
+            io.imsave(f'{s_sample}_{s_scene.replace("Scene ","scene")}_cell{cell_diam}_nuc{nuc_diam}_CombinedSegmentationBasins.tif',combine)
+        else:
+            io.imsave(f'{s_scene}_{".".join(ls_seg_markers)}-cell{cell_diam}_exp{distance}_CellSegmentationBasins.tif',combine)
+        #figure out the covered cells...labels + combined
+        not_zero_pixels =  np.array([labels.ravel() !=0,combine.ravel() !=0]).all(axis=0)
+        a_tups = np.array([combine.ravel()[not_zero_pixels],labels.ravel()[not_zero_pixels]]).T #combined over nuclei
+        unique_rows = np.unique(a_tups, axis=0)
+        new_dict = {}
+        for key, value in unique_rows:
+            if key == value:
+                continue
+            else:
+                if key in new_dict:
+                    new_dict[key].append(value)
+                else:
+                    new_dict[key] = [value]
+        #from elmar (reformat cells touching dictionary and save
+        d_result = {}
+        for i_cell, li_touch in new_dict.items():
+            d_result.update({str(i_cell): [str(i_touch) for i_touch in li_touch]})
+        dd_result.update({f'{s_sample}_{s_scene.replace("Scene ","scene")}':d_result})
+    #save dd_touch as json file
+    with open(f'result_{s_sample}_cellsatop_dictionary.json','w') as f: 
+        json.dump(dd_result, f)
+    print('')
+    return(labels,combine,dd_result)
+
+def cropped_ometiff(s_sample,subdir,cropdir,d_crop,d_combos,s_dapi,tu_dim):
+    if os.path.exists(f'{subdir}/{s_sample}'):
+        os.chdir(f'{subdir}/{s_sample}')
+    df_img = parse_org()
+    df_img['scene'] = s_sample
+    d_crop_scene = {s_sample:d_crop[s_sample]}
+    dd_result = mpimage.overlay_crop(d_combos,d_crop_scene,df_img,s_dapi,tu_dim)
+    for s_crop, d_result in dd_result.items():
+        for s_type, (ls_marker, array) in d_result.items():
+            print(f'Generating multi-page ome-tiff {[item for item in ls_marker]}')
+            new_array = array[np.newaxis,np.newaxis,:]
+            s_xml =  ometiff.gen_xml(new_array, ls_marker)
+            with tifffile.TiffWriter(f'{cropdir}/{s_crop}_{s_type}.ome.tif') as tif:
+                tif.save(new_array,  photometric = "minisblack", description=s_xml, metadata = None)
+
+
+#old
+def convert_dapi(debugdir,regdir,b_mkdir=True):
+    '''
+    convert dapi to tif, rename to match Guillaumes pipeline requirements
+    '''
+    cwd = os.getcwd()
+    os.chdir(debugdir)
+    for s_dir in sorted(os.listdir()):
+        if s_dir.find('R-1_')== 0:
+            os.chdir(s_dir)
+            for s_file in sorted(os.listdir()):
+                if s_file.find('bleach')==-1:
+                    s_round = s_file.split("Cycle(")[1].split(").ome.tif")[0]
+                    print(f'stain {s_round}')
+                    s_dir_new = s_dir.split('_')[2] + '-Scene-0' + s_dir.split('F-')[1]
+                    s_tissue_dir = s_dir.split('_F-')[0]
+                    if b_mkdir:
+                        preprocess.cmif_mkdir([f'{regdir}/{s_tissue_dir}'])
+                    a_dapi = skimage.io.imread(s_file)
+                    #rename with standard name (no stain !!!!)
+                    with skimage.external.tifffile.TiffWriter(f'{regdir}/{s_tissue_dir}/{s_dir_new}_R{s_round}_DAPI_V0_c1_ORG_5.0.tif') as tif:
+                        tif.save(a_dapi)
+            os.chdir('..')
+    os.chdir(cwd)
+
+def convert_channels(processdir, regdir, b_rename=True, testbool=True):
+    '''
+    convert channels to tif, select one exposure time of three, rename to match Guillaumes pipeline requirements
+    '''
+    cwd = os.getcwd()
+    os.chdir(processdir)
+    for s_dir in sorted(os.listdir()):
+        if s_dir.find('R-1_')== 0:
+            os.chdir(s_dir)
+            if b_rename:
+                d_rename = {'autofluorescencePE_P':'autofluorescencePE_V0_P',
+                'autofluorescenceFITC_F':'autofluorescenceFITC_V0_F',
+                '000_DAPIi':'extra000_DAPIi',
+                '000_DAPIf':'extra000_DAPIf',
+                'extraextraextra':'extra',
+                'extraextra':'extra',
+                '_FITC_':'_c2_ORG_',
+                '_PE_':'_c3_ORG_',}
+                preprocess.dchange_fname(d_rename,b_test=testbool)
+                
+            #parse file names
+            else:
+                ls_column = ['rounds','marker','dilution','fluor','ORG','exposure','expdecimal','imagetype1','imagetype']
+                df_img = mpimage.parse_img(s_end =".tif",s_start='0',s_sep1='_',s_sep2='.',ls_column=ls_column,b_test=False)
+                df_img['exposure'] = df_img.exposure.astype(dtype='int')
+                ls_marker = sorted(set(df_img.marker))
+                for s_marker in ls_marker:
+                    df_marker = df_img[df_img.marker==s_marker]
+                    df_sort = df_marker.sort_values(by=['exposure'],ascending=False,inplace=False)
+                    for idx in range(len(df_sort.index)):
+                        s_index = df_sort.index[idx]
+                        a_img = skimage.io.imread(s_index)
+                        df_file = df_sort.loc[s_index,:]
+                        print(a_img.max())
+                        if idx < len(df_sort.index) - 1:
+                            if a_img.max() < 65535:
+                                print(f'Selected {df_file.exposure} for {df_file.marker}')
+                                s_dir_new = s_dir.split('_')[2] + '-Scene-0' + s_dir.split('F-')[1]
+                                s_tissue_dir = s_dir.split('_F-')[0]
+                                s_index_new = s_index.split(".ome.tif")[0]
+                                with skimage.external.tifffile.TiffWriter(f'{regdir}/{s_tissue_dir}/{s_dir_new}_R{s_index_new}.tif') as tif:
+                                    tif.save(a_img)
+                                break
+                            else:
+                                print('Try lower exposure time')
+                        elif idx == len(df_sort.index) - 1:
+                            print(f'Selected as the lowest exposure time {df_file.exposure} for {df_file.marker}')
+                            s_dir_new = s_dir.split('_')[2] + '-Scene-0' + s_dir.split('F-')[1]
+                            s_tissue_dir = s_dir.split('_F-')[0]
+                            s_index_new = s_index.split(".ome.tif")[0]
+                            with skimage.external.tifffile.TiffWriter(f'{regdir}/{s_tissue_dir}/{s_dir_new}_R{s_index_new}.tif') as tif:
+                                tif.save(a_img)
+                        else:
+                            print('/n /n /n /n Error in finding exposure time')
+        
+            os.chdir('..')
+
+def parse_converted(regdir):
+        '''
+        parse the converted miltenyi file names,
+        regdir contains the images
+        '''
+        s_dir = os.getcwd()
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='G',s_split='_')
+        df_img.rename({'data':'scene'},axis=1,inplace=True)
+        df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        df_img['dilution'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[4] for item in [item.split('_') for item in df_img.index]]
+        df_img['scene_int'] = [item.split('Scene-')[1] for item in df_img.scene]
+        df_img['scene_int'] = df_img.scene_int.astype(dtype='int')
+        df_img['exposure'] = [item[6].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+        df_img['path'] = [f'{regdir}/{s_dir}/{item}' for item in df_img.index]
+        df_img['tissue'] = s_dir
+        return(df_img)
+
+def parse_converted_dirs(regdir):
+    '''
+    parse the converted miltenyi file names,
+    regdir is the master folder containing subfolders with ROIs/gates
+    '''
+    os.chdir(regdir)
+    df_img_all = pd.DataFrame()
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        os.chdir(s_dir)
+        s_sample = s_dir
+        print(s_sample)
+        df_img = parse_converted(s_dir)
+        df_img_all = df_img_all.append(df_img)
+        os.chdir('..')
+    return(df_img_all)
+
+def count_images(df_img,b_tile_count=True):
+    """
+    count and list slides, scenes, rounds
+    """
+    df_count = pd.DataFrame(index=sorted(set(df_img.scene)),columns=sorted(set(df_img.color)))
+    for s_sample in sorted(set(df_img.tissue)):
+        print(f'ROI {s_sample}')
+        df_img_slide = df_img[df_img.tissue==s_sample]
+        print('tiles')
+        [print(item) for item in sorted(set(df_img_slide.scene))]
+        print(f'Number of images = {len(df_img_slide)}')
+        print(f'Rounds:')
+        [print(item) for item in sorted(set(df_img_slide.rounds))]
+        print('\n')
+        if b_tile_count:
+            for s_scene in sorted(set(df_img_slide.scene)):
+                df_img_scene = df_img_slide[df_img_slide.scene==s_scene]
+                for s_color in sorted(set(df_img_scene.color)):
+                    print(f'{s_scene} {s_color} {len(df_img_scene[df_img_scene.color==s_color])}')
+                    df_count.loc[s_scene,s_color] = len(df_img_scene[df_img_scene.color==s_color])
+    return(df_count)
+
+def visualize_reg_images(regdir,qcdir,color='c1',tu_array=(3,2)):
+    """
+    array registered images to check tissue identity, focus, etc.
+    """
+    #check registration
+    preprocess.cmif_mkdir([f'{qcdir}/RegisteredImages'])
+    cwd = os.getcwd()
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        os.chdir(s_dir)
+        s_sample = s_dir
+        print(s_sample)
+        df_img = parse_converted(s_dir)
+        ls_scene = sorted(set(df_img.scene))
+        for s_scene in ls_scene:
+            print(s_scene)
+            df_img_scene = df_img[df_img.scene == s_scene]
+            df_img_stain = df_img_scene[df_img_scene.color==color]
+            df_img_sort = df_img_stain.sort_values(['rounds'])
+            i_sqrt = math.ceil(math.sqrt(len(df_img_sort)))
+            #array_img(df_img,s_xlabel='color',ls_ylabel=['rounds','exposure'],s_title='marker',tu_array=(2,4),tu_fig=(10,20))
+            if color == 'c1':
+                fig = mpimage.array_img(df_img_sort,s_xlabel='marker',ls_ylabel=['rounds','exposure'],s_title='rounds',tu_array=tu_array,tu_fig=(16,14))
+            else:
+                fig = mpimage.array_img(df_img_sort,s_xlabel='color',ls_ylabel=['rounds','exposure'],s_title='marker',tu_array=tu_array,tu_fig=(16,12))
+            fig.savefig(f'{qcdir}/RegisteredImages/{s_scene}_registered_{color}.png')
+        os.chdir('..')
+    os.chdir(cwd)
+    #return(df_img)
+
+def rename_files(d_rename,dir,b_test=True):
+    """
+    change file names
+    """
+    cwd = os.getcwd()
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{dir}/{s_dir}'
+        os.chdir(s_path)
+        print(s_dir)
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='reg',s_split='_')
+        df_img.rename({'data':'scene'},axis=1,inplace=True)
+        df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+        if b_test:
+            print('This is a test')
+            preprocess.dchange_fname(d_rename,b_test=True)
+        elif b_test==False:
+            print('Changing name - not a test')
+            preprocess.dchange_fname(d_rename,b_test=False)
+        else:
+            pass
+
+def rename_fileorder(s_sample, dir, b_test=True):
+    """
+    change file names
+    """
+    cwd = os.getcwd()
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{dir}/{s_dir}'
+        os.chdir(s_path)
+        print(s_dir)
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='Scene',s_split='_')
+        df_img.rename({'data':'scene'},axis=1,inplace=True)
+        df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+        for s_index in df_img.index:
+            s_round = df_img.loc[s_index,'rounds']
+            s_scene= f"{s_sample}-{df_img.loc[s_index,'scene']}"
+            s_marker = df_img.loc[s_index,'marker']
+            s_color = df_img.loc[s_index,'color']
+            s_index_rename = f'{s_round}_{s_scene}_{s_marker}_{s_color}_ORG.tif'
+            d_rename = {s_index:s_index_rename}
+            if b_test:
+                print('This is a test')
+                preprocess.dchange_fname(d_rename,b_test=True)
+            elif b_test==False:
+                print('Changing name - not a test')
+                preprocess.dchange_fname(d_rename,b_test=False)
+            else:
+                pass
+
+
+def copy_files(dir,dapi_copy, marker_copy,testbool=True,type='codex'):
+    """
+    copy and rename files if needed as dummies
+    need to edit
+    """
+    os.chdir(dir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{dir}/{s_dir}'
+        os.chdir(s_path)
+        #s_sample = s_dir.split('-Scene')[0]
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='Scene',s_split='_')
+        df_img.rename({'data':'scene'},axis=1,inplace=True)
+        df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+        print(s_dir)
+        #if b_test:
+        for key, dapi_item in dapi_copy.items():
+                df_dapi = df_img[(df_img.rounds== key.split('_')[1]) & (df_img.color=='c1')]
+                s_dapi = df_dapi.loc[:,'marker'][0]
+                preprocess.copy_dapis(s_r_old=key,s_r_new=f'_cyc{dapi_item}_',s_c_old='_c1_',
+                 s_c_new='_c2_',s_find=f'_c1_{s_dapi}_ORG.tif',b_test=testbool,type=type)
+        i_count=0
+        for idx,(key, item) in enumerate(marker_copy.items()):
+                preprocess.copy_markers(df_img, s_original=key, ls_copy = item,
+                 i_last_round= dapi_item + i_count, b_test=testbool,type=type)
+                i_count=i_count + len(item)
+
+def segmentation_thresholds(regdir,qcdir, d_segment):
+    """
+    visualize binary mask of segmentaiton threholds
+    need to edit
+    """
+    preprocess.cmif_mkdir([f'{qcdir}/Segmentation'])
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='Scene',s_split='_')
+        df_img.rename({'data':'scene'},axis=1,inplace=True)
+        df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[3].split('.')[0] for item in [item.split('_') for item in df_img.index]]
+        s_sample = s_dir
+        print(s_sample)
+        d_seg = preprocess.check_seg_markers(df_img,d_segment, i_rows=1, t_figsize=(6,6)) #few scenes
+        for key, fig in d_seg.items():
+            fig.savefig(f'{qcdir}/Segmentation/{s_dir}_{key}_segmentation.png')
+
+
+def segmentation_inputs(s_sample,regdir,segdir,d_segment,b_start=False):
+    """
+    make inputs for guillaumes segmentation
+    """
+    os.chdir(regdir)
+    for idx, s_dir in enumerate(sorted(os.listdir())):
+        s_path = f'{regdir}/{s_dir}'
+        os.chdir(s_path)
+        df_img = mpimage.filename_dataframe(s_end = ".tif",s_start='R',s_split='_')
+        df_img.rename({'data':'rounds'},axis=1,inplace=True)
+        #df_img['rounds'] = [item[1] for item in [item.split('_') for item in df_img.index]]
+        df_img['color'] = [item[3] for item in [item.split('_') for item in df_img.index]]
+        df_img['marker'] = [item[2] for item in [item.split('_') for item in df_img.index]]
+        #s_sample = s_dir
+        #s_sample = s_dir.split('-Scene')[0]
+        print(s_sample)
+        df_marker = df_img[df_img.color!='c1']
+        df_marker = df_marker.sort_values(['rounds','color'])
+        df_dapi = pd.DataFrame(index = [df_marker.marker.tolist()],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+        df_dapi['rounds'] = df_marker.loc[:,['rounds']].values
+        df_dapi['colors'] = df_marker.loc[:,['color']].values
+        df_dapi['minimum'] = 1003
+        df_dapi['maximum'] = 65535
+        df_dapi['exposure'] = 100
+        df_dapi['refexp'] = 100
+        df_dapi['location'] = 'All'
+        for s_key,i_item in d_segment.items():
+            df_dapi.loc[s_key,'minimum'] = i_item
+        df_dapi.to_csv('RoundsCyclesTable.txt',sep=' ',header=False)
+        df_dapi.to_csv(f'metadata_{s_sample}_RoundsCyclesTable.csv',header=True)
+        #create cluster.java file
+        preprocess.cluster_java(s_dir=f'JE{idx}',s_sample=s_sample,imagedir=f'{s_path}',segmentdir=segdir,type='exacloud',b_segment=True,b_TMA=False)
+        if b_start:
+            os.chdir(f'/home/groups/graylab_share/Chin_Lab/ChinData/Work/engje/exacloud/JE{idx}') #exacloud
+            print(f'JE{idx}')
+            os.system('make_sh')
diff --git a/mplex_image/mpimage.py b/mplex_image/mpimage.py
new file mode 100755
index 0000000..86746e4
--- /dev/null
+++ b/mplex_image/mpimage.py
@@ -0,0 +1,817 @@
+####
+# title: mpimage.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to display, normalize and crop multiplex images
+####
+
+#libraries
+import matplotlib as mpl
+mpl.use('agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage
+import pandas as pd
+#import bioformats 
+import re
+import shutil
+from itertools import chain
+import matplotlib.ticker as ticker
+
+#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF/')
+#from apeer_ometiff_library import omexmlClass
+
+#functions
+
+
+def parse_img(s_end =".tif",s_start='',s_sep1='_',s_sep2='.',s_exclude='Gandalf',ls_column=['rounds','color','imagetype','scene'],b_test=True):
+    '''
+    required columns: ['rounds','color','imagetype','scene']
+    meta names names=['rounds','color','minimum', 'maximum', 'exposure', 'refexp','location'],#'marker',
+    return = df_img
+    '''
+    ls_file = []
+    for file in os.listdir():
+        #find all filenames ending in s_end
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                if file.find(s_exclude)==-1:
+                     ls_file = ls_file + [file]
+        
+    print(f'test {int(1.1)}')
+    #make a list of list of file name items separated by s_sep
+    llls_split = []
+    for items in [item.split(s_sep1)for item in ls_file]:
+        llls_split.append([item.split(s_sep2) for item in items])
+
+    lls_final = []
+    for lls_split in llls_split:
+        lls_final.append(list(chain.from_iterable(lls_split)))
+
+    #make a blank dataframe with the index being the filename 
+    df_img = pd.DataFrame(index=ls_file, columns=ls_column)
+    if b_test:
+        print(lls_final[0])
+        print(f'Length = {len(lls_final[0])}')
+    #add a column for each part of the name
+    else:
+        for fidx, ls_final in enumerate(lls_final):
+            for idx, s_name in enumerate(ls_final):
+                df_img.loc[ls_file[fidx],ls_column[idx]] = s_name
+        print('Mean number of items in file name')
+        print(np.asarray([(len(item)) for item in lls_final]).mean())
+        if (np.asarray([(len(item)) for item in lls_final]).mean()).is_integer()==False:
+            print([(len(item)) for item in lls_final])
+            i_right = np.asarray([(len(item)) for item in lls_final]).max()
+            for fidx, ls_final in enumerate(lls_final):
+                if len(ls_final) < i_right:
+                    print(f' inconsitent name: {ls_file[fidx]}')
+    return(df_img)
+
+def parse_org(s_end = "ORG.tif",s_start='R',type='reg'):
+    """
+    This function will parse images following koei's naming convention
+    Example: Registered-R1_PCNA.CD8.PD1.CK19_Her2B-K157-Scene-002_c1_ORG.tif
+    The output is a dataframe with image filename in index
+    And rounds, color, imagetype, scene (/tissue), and marker in the columns
+    type= 'reg' or 'raw'
+    """
+
+    ls_file = []
+    for file in os.listdir():
+    #find all filenames ending in s_end
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                ls_file = ls_file + [file]
+    lls_name = [item.split('_') for item in ls_file]
+    df_img = pd.DataFrame(index=ls_file)
+    if type == 'raw':
+        lls_scene = [item.split('-Scene-') for item in ls_file]
+    elif type== 'noscenes':
+        ls_scene = ['Scene-001'] * len(ls_file)
+    if type == 'raw':
+        df_img['rounds'] = [item[0] for item in lls_name]
+    elif type== 'noscenes':
+        df_img['rounds'] = [item[0] for item in lls_name]
+    else:
+        df_img['rounds'] = [item[0].split('Registered-')[1] for item in lls_name]
+    df_img['color'] = [item[-2] for item in lls_name]
+    df_img['imagetype'] = [item[-1].split('.tif')[0] for item in lls_name]
+    if type == 'raw':
+        df_img['slide'] = [item[2] for item in lls_name]
+        try:
+            df_img['scene'] = [item[1].split('_')[0] for item in lls_scene]
+        except IndexError:
+            print(f"{set([item[0] for item in lls_scene])}")
+    elif type == 'noscenes':
+        df_img['slide'] = [item[2] for item in lls_name]
+        df_img['scene'] = ls_scene
+    else:
+        df_img['scene'] = [item[2] for item in lls_name]
+    df_img['round_ord'] = [re.sub('Q','.5', item) for item in df_img.rounds] 
+    df_img['round_ord'] = [float(re.sub('[^0-9.]','', item)) for item in df_img.round_ord]
+    df_img = df_img.sort_values(['round_ord','rounds','color'])
+    for idx, s_round in enumerate(df_img.rounds.unique()):
+        df_img.loc[df_img.rounds==s_round, 'round_num'] = idx
+    #parse file name for biomarker
+    for s_index in df_img.index:
+        #print(s_index)
+        s_color = df_img.loc[s_index,'color']
+        if s_color == 'c1':
+            s_marker = 'DAPI'
+        elif s_color == 'c2':
+            s_marker = s_index.split('_')[1].split('.')[0]
+        elif s_color == 'c3':
+            s_marker = s_index.split('_')[1].split('.')[1]
+        elif s_color == 'c4':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c5':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        #these are only included in sardana shading corrected images
+        elif s_color == 'c6':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c7':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        else: print('Error')
+        df_img.loc[s_index,'marker'] = s_marker
+
+    return(df_img) #,lls_name)
+
+def filename_dataframe(s_end = ".czi",s_start='R',s_split='_'):
+    '''
+    quick and dirty way to select files for dataframe. 
+    s_end = string at end of file names
+    s_start = string at beginning of filenames
+    s_split = character/string in all file names
+    '''
+    ls_file = []
+    for file in os.listdir():
+    #find all filenames ending in 'ORG.tif'
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                ls_file = ls_file + [file]
+    lls_name = [item.split(s_split) for item in ls_file]
+    df_img = pd.DataFrame(index=ls_file)
+    df_img['data'] = [item[0] for item in lls_name]
+    return(df_img)
+
+def underscore_to_dot(s_sample, s_end='ORG.tif', s_start='R',s_split='_'):
+    df = filename_dataframe(s_end,s_start,s_split)
+    ls_old =  sorted(set([item.split(f'_{s_sample}')[0] for item in df.index]))
+    ls_new =  sorted(set([item.split(f'_{s_sample}')[0].replace('_','.').replace(f"{df.loc[item,'data']}.",f"{df.loc[item,'data']}_") for item in df.index]))
+    d_replace = dict(zip(ls_old,ls_new))
+    for key, item in d_replace.items():
+        if key.split('_')[0] != item.split('_')[0]:
+            print(f' Error {key} mathced to {item}')
+    return(d_replace)
+
+def add_exposure(df_img,df_t,type='roundcycles'):
+    """
+    df_img = dataframe of images with columns [ 'color', 'exposure', 'marker','sub_image','sub_exposure']
+            and index with image names
+    df_t = metadata with dataframe with ['marker','exposure']
+    """
+    if type == 'roundscycles':
+        for s_index in df_img.index:
+            s_marker = df_img.loc[s_index,'marker']
+            #look up exposure time for marker in metadata
+            df_t_image = df_t[(df_t.marker==s_marker)]
+            if len(df_t_image) > 0:
+                i_exposure = df_t_image.iloc[0].loc['exposure']
+                df_img.loc[s_index,'exposure'] = i_exposure
+            else:
+                print(f'{s_marker} has no recorded exposure time')
+    elif type == 'czi':
+    #add exposure
+        df_t['rounds'] = [item.split('_')[0] for item in df_t.index]
+        #df_t['tissue'] = [item.split('_')[2].split('-Scene')[0] for item in df_t.index] #not cool with stiched 
+        for s_index in df_img.index:
+            s_tissue = df_img.loc[s_index,'scene'].split('-Scene')[0]
+            s_color = str(int(df_img.loc[s_index,'color'].split('c')[1])-1)
+            s_round = df_img.loc[s_index,'rounds']
+            print(s_index)
+            df_img.loc[s_index,'exposure'] = df_t[(df_t.index.str.contains(s_tissue)) & (df_t.rounds==s_round)].loc[:,s_color][0]
+
+    return(df_img)
+
+def subtract_images(df_img,d_channel={'c2':'L488','c3':'L555','c4':'L647','c5':'L750'},ls_exclude=[],subdir='SubtractedRegisteredImages',b_8bit=True):#b_mkdir=True,
+    """
+    This code loads 16 bit grayscale tiffs, performs AF subtraction of channels/rounds defined by the user, and outputs 8 bit AF subtracted tiffs for visualization.
+    The data required is:
+    1. The RoundsCyclesTable with real exposure times
+    2. dataframe of images to process (df_img); can be created with any custom parsing function
+        df_img = dataframe of images with columns [ 'color', 'exposure', 'marker']
+            and index with image names
+        d_channel = dictionary mapping color to marker to subtract
+        ls_exclude = lost of markers not needing subtraction
+    """
+    #generate dataframe of subtraction markers 
+    es_subtract = set()
+    for s_key, s_value in d_channel.items():
+        es_subtract.add(s_value)
+        print(f'Subtracting {s_value} for all {s_key}')
+    
+    df_subtract = pd.DataFrame()
+    for s_subtract in sorted(es_subtract):
+        se_subtract = df_img[df_img.marker==s_subtract]
+        df_subtract = df_subtract.append(se_subtract)
+    print(f'The background images {df_subtract.index.tolist}')
+    print(f'The background markers {df_subtract.marker.tolist}')
+    
+    #generate dataframe of how subtraction is set up
+    #set of markers minus the subtraction markers 
+    es_markers = set(df_img.marker) - es_subtract
+    #dataframe of markers
+    df_markers = df_img[df_img.loc[:,'marker'].isin(sorted(es_markers))]
+    #minus dapi (color 1 or DAPI)
+    #df_markers = df_markers[df_markers.loc[:,'color']!='c1']
+    #df_markers = df_markers[~df_markers.loc[:,'marker'].str.contains('DAPI')]
+    df_copy = df_img[df_img.marker.isin(ls_exclude)]
+    df_markers = df_markers[~df_markers.marker.isin(ls_exclude)]
+    
+    for s_file in df_copy.index.tolist():
+        print(s_file)
+        #print(f'copied to ./AFSubtracted/{s_file}')
+        #shutil.copyfile(s_file,f'./AFSubtracted/{s_file}')
+        print(f'copied to {subdir}/{s_file}')
+        shutil.copyfile(s_file,f'{subdir}/{s_file}')
+    #ls_scene = sorted(set(df_img.scene))
+    #add columns with mapping of proper subtracted image to dataframe
+    
+    for s_index in df_markers.index.tolist():
+        print('add colums')
+        print(s_index)
+        s_scene = s_index.split('_')[2]
+        s_color = df_markers.loc[s_index,'color']
+        if len(df_subtract[(df_subtract.color==s_color) & (df_subtract.scene==s_scene)])==0:
+            print(f'missing {s_color} in {s_scene}')
+        else:
+            df_markers.loc[s_index,'sub_image'] = df_subtract[(df_subtract.color==s_color) & (df_subtract.scene==s_scene)].index[0]
+            df_markers.loc[s_index,'sub_exposure'] = df_subtract[(df_subtract.color==s_color) & (df_subtract.scene==s_scene)].exposure[0]
+    
+    #loop to subtract
+    for s_index in df_markers.index.tolist():
+        print(f'Processing {s_index}')
+        s_image = s_index
+        s_color = '_' + df_markers.loc[s_index,'color'] + '_'
+        s_background = df_markers.loc[s_index,'sub_image']
+        print(f'From {s_image} subtracting \n {s_background}')
+        a_img = skimage.io.imread(s_image)
+        a_AF = skimage.io.imread(s_background)
+        #divide each image by exposure time
+        #subtract 1 ms AF from 1 ms signal
+        #multiply by original image exposure time
+        a_sub = (a_img/df_markers.loc[s_index,'exposure'] - a_AF/df_markers.loc[s_index,'sub_exposure'])*df_markers.loc[s_index,'exposure']
+        a_zero = (a_sub.clip(min=0)).astype(int) #max=a_sub.max() #took out max parameter from np.clip, but it was fine in
+        if b_8bit:
+            #a_16bit = skimage.img_as_ubyte(a_zero)
+            #a_zero = a_sub.clip(min=0,max=a_sub.max())
+            a_bit = (a_zero/256).astype(np.uint8)
+        else:
+            a_bit = skimage.img_as_uint(a_zero)
+        s_fname = f'{subdir}/{s_index.split(s_color)[0]}_Sub{df_subtract.loc[df_markers.loc[s_index,"sub_image"],"marker"]}{s_color}{s_index.split(s_color)[1]}'
+        skimage.io.imsave(s_fname,a_bit)
+    
+    return(df_markers,df_copy)#df_markers,es_subtract
+
+def subtract_scaled_images(df_img,d_late={'c2':'R5Qc2','c3':'R5Qc3','c4':'R5Qc4','c5':'R5Qc5'},d_early={'c2':'R0c2','c3':'R0c3','c4':'R0c4','c5':'R0c5'},ls_exclude=[],subdir='SubtractedRegisteredImages',b_8bit=False):
+    """
+    This code loads 16 bit grayscale tiffs, performs scaled AF subtraction 
+    based on the round position between early and late AF channels/rounds defined by the user,
+    and outputs  AF subtracted tiffs  or ome-tiffs for visualization.
+    The data required is:
+    1. The RoundsCyclesTable with real exposure times
+    2. dataframe of images to process (df_img); can be created with any custom parsing function
+        df_img = dataframe of images with columns [ 'color', 'exposure', 'marker','round_ord']
+            and index with image names
+        d_channel = dictionary mapping color to marker to subtract
+        ls_exclude = lost of markers not needing subtraction
+    """
+    #generate dataframe of subtraction markers 
+    es_subtract = set()
+    [es_subtract.add(item) for key, item in d_early.items()]
+    [es_subtract.add(item) for key, item in d_late.items()]
+    
+    #markers minus the subtraction markers & excluded markers
+    es_markers = set(df_img.marker) - es_subtract
+    #dataframe of markers
+    df_markers = df_img[df_img.loc[:,'marker'].isin(es_markers)]
+    df_copy = df_img[df_img.marker.isin(ls_exclude)]
+    df_markers = df_markers[~df_markers.marker.isin(ls_exclude)]
+    
+    #copy excluded markers
+    for s_file in df_copy.index.tolist():
+        print(s_file)
+        print(f'copied to {subdir}/{s_file}')
+        shutil.copyfile(s_file,f'{subdir}/{s_file}')
+
+    #add columns with mapping of proper AF images to marker images
+    for s_index in df_markers.index.tolist():
+        print('add colums')
+        print(s_index)
+        s_scene = df_markers.loc[s_index,'scene']
+        s_color = df_markers.loc[s_index,'color']
+        s_early = d_early[s_color]
+        s_late = d_late[s_color]
+        i_round = df_markers.loc[s_index,'round_num']
+        df_scene = df_img[df_img.scene==s_scene]
+        if len(df_scene[df_scene.marker==s_early]) == 0:
+            print(f' Missing early AF channel for {s_scene} {s_color}')
+        elif len(df_scene[df_scene.marker==s_late]) == 0:
+            print(f' Missing late AF channel for {s_scene} {s_color}')
+        else:
+            i_early = df_scene[(df_scene.marker==s_early)].round_num[0]
+            i_late = df_scene[(df_scene.marker==s_late)].round_num[0]
+            df_markers.loc[s_index,'sub_name'] = f'{s_early}{s_late}'
+            df_markers.loc[s_index,'sub_early'] = df_scene[(df_scene.marker==s_early)].index[0]
+            df_markers.loc[s_index,'sub_early_exp'] = df_scene[(df_scene.marker==s_early)].exposure[0]
+            df_markers.loc[s_index,'sub_late'] = df_scene[(df_scene.marker==s_late)].index[0]
+            df_markers.loc[s_index,'sub_late_exp'] = df_scene[(df_scene.marker==s_late)].exposure[0]
+            df_markers.loc[s_index,'sub_ratio_late'] = np.clip((i_round-i_early)/(i_late - i_early),0,1)
+            df_markers.loc[s_index,'sub_ratio_early'] = np.clip(1 - (i_round-i_early)/(i_late - i_early),0,1)
+
+    #loop to subtract
+    for s_index in df_markers.index.tolist():
+        print(f'Processing {s_index}')
+        s_color = '_' + df_markers.loc[s_index,'color'] + '_'
+        a_img = skimage.io.imread(s_index)
+        a_early = skimage.io.imread(df_markers.loc[s_index,'sub_early'])
+        a_late = skimage.io.imread(df_markers.loc[s_index,'sub_late'])
+        #divide each image by exposure time
+        a_img_exp = a_img/df_markers.loc[s_index,'exposure']
+        a_early_exp = a_early/df_markers.loc[s_index,'sub_early_exp']
+        a_late_exp = a_late/df_markers.loc[s_index,'sub_late_exp']
+        #combine early and late based on round_num
+        a_early_exp = a_early_exp * df_markers.loc[s_index,'sub_ratio_early']
+        a_late_exp = a_late_exp * df_markers.loc[s_index,'sub_ratio_late']
+        #subtract 1 ms AF from 1 ms signal
+        #multiply by original image exposure time
+        a_sub = (a_img_exp - a_early_exp - a_late_exp)*df_markers.loc[s_index,'exposure']
+        a_zero = (a_sub.clip(min=0)).astype(int) #
+        if b_8bit:
+            a_bit = (a_zero/256).astype(np.uint8)
+        else:
+            a_bit = skimage.img_as_uint(a_zero)
+        s_fname = f'{subdir}/{s_index.split(s_color)[0]}_Sub{df_markers.loc[s_index,"sub_name"]}{s_color}{s_index.split(s_color)[1]}'
+        skimage.io.imsave(s_fname,a_bit)
+    
+    return(df_markers,df_copy)
+
+def overlay_crop(d_combos,d_crop,df_img,s_dapi,tu_dim=(1000,1000),b_8bit=True): 
+    """
+    output custon multi page tiffs according to dictionary, with s_dapi as channel 1 in each overlay
+    BUG with 53BP1
+    d_crop : {slide_scene : (x,y) coord
+    tu_dim = (width, height)
+    d_combos = {'Immune':{'CD45', 'PD1', 'CD8', 'CD4', 'CD68', 'FoxP3','GRNZB','CD20','CD3'},
+    'Stromal':{'Vim', 'aSMA', 'PDPN', 'CD31', 'ColIV','ColI'},
+    'Differentiation':{'CK19', 'CK7','CK5', 'CK14', 'CK17','CK8'},
+    'Tumor':{'HER2', 'Ecad', 'ER', 'PgR','Ki67','PCNA'},
+    'Proliferation':{'EGFR','CD44','AR','pHH3','pRB'}, 
+    'Functional':{'pS6RP','H3K27','H3K4','cPARP','gH2AX','pAKT','pERK'},
+    'Lamins':{'LamB1','LamAC', 'LamB2'}}
+    """
+    dd_result = {}
+    for s_index in df_img.index:
+        s_marker =  df_img.loc[s_index,'marker']
+        if s_marker == 'DAPI':
+            s_marker = s_marker + f'{df_img.loc[s_index,"rounds"].split("R")[1]}'
+        df_img.loc[s_index,'marker'] = s_marker
+    #now make overlays
+    for s_scene, xy_cropcoor in d_crop.items():
+        d_result = {}
+        print(f'Processing {s_scene}')
+        df_slide = df_img[df_img.scene==s_scene]
+        s_image_round = df_slide[df_slide.marker==s_dapi].index[0]
+        if len(df_slide[df_slide.marker==s_dapi.split('_')[0]].index) == 0:
+            print('Error: dapi not found')
+        elif len(df_slide[df_slide.marker==s_dapi.split('_')[0]].index) > 1:
+            print('Error: too many dapi images found')
+        else:
+            print(s_image_round)
+        #exclude any missing biomarkers
+        es_all = set(df_slide.marker)
+        #iterate over overlay combinations
+        for s_type, es_combos in d_combos.items():
+            d_overlay = {}
+            es_combos_shared = es_combos.intersection(es_all)
+            for idx, s_combo in enumerate(sorted(es_combos_shared)):
+                s_filename = (df_slide[df_slide.marker==s_combo]).index[0]
+                if len((df_slide[df_slide.marker==s_combo]).index) == 0:
+                    print(f'Error: {s_combo} not found')
+                elif len((df_slide[df_slide.marker==s_combo]).index) > 1:
+                    print(f'\n Warning {s_combo}: too many marker images found, used {s_filename}')
+                else:
+                    print(f'{s_combo}: {s_filename}')
+                d_overlay.update({s_combo:s_filename})
+            #d_overlay.update({s_dapi:s_image_round})
+            a_dapi = skimage.io.imread(s_image_round)
+            #crop 
+            a_crop = a_dapi[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+            a_overlay = np.zeros((len(d_overlay) + 1,a_crop.shape[0],a_crop.shape[1]),dtype=np.uint8)
+            if a_crop.dtype == 'uint16':
+                if b_8bit:
+                    a_crop = (a_crop/256).astype(np.uint8)
+                else:
+                    a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=(0,1.5*np.quantile(a_crop,0.9999)))
+                    a_crop = (a_rescale/256).astype(np.uint8)
+                    print(f'rescale intensity')
+            a_overlay[0,:,:] = a_crop
+            ls_biomarker_all = [s_dapi]
+            for i, s_color in enumerate(sorted(d_overlay.keys())):
+                s_overlay= d_overlay[s_color]
+                ls_biomarker_all.append(s_color)
+                a_channel = skimage.io.imread(s_overlay)
+                #crop 
+                a_crop = a_channel[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+                if a_crop.dtype == 'uint16':
+                    if b_8bit:
+                        a_crop = (a_crop/256).astype(np.uint8)
+                    else:
+                        a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=(0,1.5*np.quantile(a_crop,0.9999)))
+                        a_crop = (a_rescale/256).astype(np.uint8)
+                        print(f'rescale intensity')
+                a_overlay[i + 1,:,:] = a_crop
+            d_result.update({s_type:(ls_biomarker_all,a_overlay)})
+        dd_result.update({f'{s_scene}_x{xy_cropcoor[0]}y{xy_cropcoor[1]}':d_result})
+        return(dd_result)
+
+def gen_xml(array, channel_names):
+    '''
+    copy and modify from apeer ome tiff
+    ls_marker
+    '''
+    #for idx, s_marker in enumerate(ls_marker):
+    #    old = bytes(f'Name="C:{idx}"','utf-8')
+    #    new = bytes(f'Name="{s_marker}"','utf-8')
+    #    s_xml = s_xml.replace(old,new,-1)
+    #Dimension order is assumed to be TZCYX
+    dim_order = "TZCYX"
+    
+    metadata = omexmlClass.OMEXML()
+    shape = array.shape
+    assert ( len(shape) == 5), "Expected array of 5 dimensions"
+    
+    metadata.image().set_Name("IMAGE")
+    metadata.image().set_ID("0")
+    
+    pixels = metadata.image().Pixels
+    pixels.ome_uuid = metadata.uuidStr
+    pixels.set_ID("0")
+    
+    pixels.channel_count = shape[2]
+    
+    pixels.set_SizeT(shape[0])
+    pixels.set_SizeZ(shape[1])
+    pixels.set_SizeC(shape[2])
+    pixels.set_SizeY(shape[3])
+    pixels.set_SizeX(shape[4])
+    
+    pixels.set_DimensionOrder(dim_order[::-1])
+    
+    pixels.set_PixelType(omexmlClass.get_pixel_type(array.dtype))
+    
+    for i in range(pixels.SizeC):
+        pixels.Channel(i).set_ID("Channel:0:" + str(i))
+        pixels.Channel(i).set_Name(channel_names[i])
+    
+    for i in range(pixels.SizeC):
+        pixels.Channel(i).set_SamplesPerPixel(1)
+        
+    pixels.populate_TiffData()
+    
+    return metadata.to_xml().encode()
+
+def array_img(df_img,s_xlabel='color',ls_ylabel=['rounds','exposure'],s_title='marker',tu_array=(2,4),tu_fig=(10,20),cmap='gray',d_crop={}):
+    """
+    create a grid of images
+    df_img = dataframe of images with columns having image attributes
+        and index with image names
+    s_xlabel = coumns of grid
+    ls_ylabel = y label 
+    s_title= title
+
+    """
+     
+    fig, ax = plt.subplots(tu_array[0],tu_array[1],figsize=tu_fig)
+    ax = ax.ravel()
+    for ax_num, s_index in enumerate(df_img.index):
+        s_row_label = f'{df_img.loc[s_index,ls_ylabel[0]]}\n {df_img.loc[s_index,ls_ylabel[1]]}'
+        s_col_label = df_img.loc[s_index,s_xlabel]
+        a_image=skimage.io.imread(s_index)
+        s_label_img = df_img.loc[s_index,s_title]
+        a_rescale = skimage.exposure.rescale_intensity(a_image,in_range=(0,1.5*np.quantile(a_image,0.98)))
+        if len(d_crop)!= 0:
+            tu_crop = d_crop[df_img.loc[s_index,'scene']]
+            a_rescale = a_rescale[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        ax[ax_num].imshow(a_rescale,cmap=cmap)
+        ax[ax_num].set_title(s_label_img)
+        ax[ax_num].set_ylabel(s_row_label)
+        ax[ax_num].set_xlabel(f'{s_col_label}\n 0 - {int(1.5*np.quantile(a_image,0.98))}')
+    plt.tight_layout()
+    return(fig)
+
+def array_roi(df_img,s_column='color',s_row='rounds',s_label='marker',tu_crop=(0,0,100,100),tu_array=(2,4),tu_fig=(10,20), cmap='gray',b_min_label=True,tu_rescale=(0,0)):
+    """
+    create a grid of images
+    df_img = dataframe of images with columns having image attributes
+        and index with image names
+    s_column = coumns of grid
+    s_row = rows of grid
+    s_label= attribute to label axes
+    tu_crop = (upper left corner x,  y , xlength, yheight)
+    tu_dim = a tumple of x and y dimensinons of crop
+    """
+     
+    fig, ax = plt.subplots(tu_array[0],tu_array[1],figsize=tu_fig,sharex=True, sharey=True) 
+    if b_min_label:
+        fig, ax = plt.subplots(tu_array[0],tu_array[1],figsize=tu_fig, sharey=True) 
+    ax = ax.ravel()
+    for ax_num, s_index in enumerate(df_img.index):
+        s_row_label = df_img.loc[s_index,s_row]
+        s_col_label = df_img.loc[s_index,s_column]
+        s_label_img = df_img.loc[s_index,s_label]
+        #load image, copr, rescale
+        a_image=skimage.io.imread(s_index)
+        a_crop = a_image[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        if tu_rescale==(0,0):
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=(0,np.quantile(a_image,0.98)+np.quantile(a_image,0.98)/2))
+            tu_max = (0,np.quantile(a_image,0.98)+np.quantile(a_image,0.98)/2)
+            ax[ax_num].imshow(a_rescale,cmap='gray')
+        else:
+            print(f'original {a_crop.min()},{a_crop.max()}')
+            print(f'rescale to {tu_rescale}')
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=tu_rescale,out_range=tu_rescale)
+            tu_max=tu_rescale
+            ax[ax_num].imshow(a_rescale,cmap=cmap,vmin=0, vmax=tu_max[1])
+        ax[ax_num].set_title(s_label_img)
+        ax[ax_num].set_ylabel(s_row_label)
+        ax[ax_num].set_xlabel(s_col_label)
+        if b_min_label:
+            ax[ax_num].set_xticklabels('')
+            ax[ax_num].set_xlabel(f'{tu_max[0]} - {int(tu_max[1])}') #min/max = 
+    plt.tight_layout()
+    return(fig)
+
+def load_labels(d_crop,segdir,s_find='Nuclei Segmentation Basins'):
+    """
+    load the segmentation basins (cell of nuceli) 
+    s_find: 'exp5_CellSegmentationBasins' or 'Nuclei Segmentation Basins'
+    """
+    d_label={}
+    cwd = os.getcwd()
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        s_sample = s_scene.split('-Scene-')[0]
+        os.chdir(f'{segdir}')
+        for s_file in os.listdir():
+            if s_file.find(s_find) > -1: #Nuclei Segmentation Basins.tif #Cell Segmentation Basins.tif
+                if s_file.find(s_scene.split(s_sample)[1]) > -1:
+                    print(f'loading {s_file}')
+                    a_seg = skimage.io.imread(s_file)
+                    d_label.update({s_scene:a_seg})
+    os.chdir(cwd)
+    return(d_label)
+
+def crop_labels(d_crop,d_label,tu_dim,cropdir,s_name='Nuclei Segmentation Basins'):
+    """
+    crop the segmentation basins (cell of nuceli) to same coord as images for veiwing in Napari
+    s_name = 
+    """
+    for s_scene, xy_cropcoor in d_crop.items():
+        print(s_scene)
+        a_seg = d_label[s_scene]
+        a_crop = a_seg[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+        s_coor = f'x{xy_cropcoor[0]}y{xy_cropcoor[1]}.tif'
+        #crop file
+        s_file_new = f'{cropdir}/{s_scene}_{s_name.replace(" ","")}{s_coor}'
+        print(s_file_new)
+        skimage.io.imsave(s_file_new,a_crop)
+
+
+def fmt(x, pos):
+    a, b = '{:.0e}'.format(x).split('e')
+    b = int(b)
+    return r'${} \times 10^{{{}}}$'.format(a, b)
+
+def array_roi_if(df_img,df_dapi,s_label='rounds',s_title='Title',tu_crop=(0,0,100,100),tu_array=(2,4),tu_fig=(10,20),tu_rescale=(0,0),i_expnorm=0,i_micron_per_pixel=.325):
+    """
+    create a grid of images
+    df_img = dataframe of images with columns having image attributes
+        and index with image names
+    df_dapi = like df_img, but with the matching dapi images
+    s_label= attribute to label axes
+    s_title = x axis title
+    tu_crop = (upper left corner x,  y , xlength, yheight)
+    tu_array = subplot array dimensions
+    tu_fig = size of figue
+    tu_rescale= range of rescaling
+    i_expnorm = normalize to an exposure time (requires 'exposure' column in dataframe
+    """
+    cmap = mpl.colors.LinearSegmentedColormap.from_list('cmap', [(0,0,0),(0,1,0)], N=256, gamma=1.0)
+    fig, ax = plt.subplots(tu_array[0],tu_array[1],figsize=tu_fig,sharey=True, squeeze=False) #
+    ax = ax.ravel()
+    for ax_num, s_index in enumerate(df_img.index):
+        s_col_label = df_img.loc[s_index,s_label]
+        #load image, copr, rescale
+        a_image=skimage.io.imread(s_index)
+        a_dapi = skimage.io.imread((df_dapi).index[0])# & (df_dapi.rounds=='R1')
+        a_crop = a_image[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        a_crop_dapi = a_dapi[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        #a_crop_dapi = (a_crop_dapi/255).astype('int')
+        if i_expnorm > 0:
+            a_crop = a_crop/df_img.loc[s_index,'exposure']*i_expnorm
+        if tu_rescale==(0,0):
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=(np.quantile(a_crop,0.03),1.5*np.quantile(a_crop,0.998)),out_range=(0, 255))
+            tu_max = (np.quantile(a_crop,0.03),1.5*np.quantile(a_crop,0.998))
+        else:
+            #print(f'original {a_crop.min()},{a_crop.max()}')
+            #print(f'rescale to {tu_rescale}')
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range = tu_rescale,out_range=(0,255))
+            tu_max=tu_rescale
+        a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop_dapi,in_range = (np.quantile(a_crop_dapi,0.03),2*np.quantile(a_crop_dapi,0.99)),out_range=(0,255)) 
+        a_rescale_dapi = a_rescale_dapi.astype(np.uint8)
+        a_rescale = a_rescale.astype(np.uint8)
+        #2 color png
+        zdh = np.dstack((np.zeros_like(a_rescale), a_rescale, a_rescale_dapi))
+        ax[ax_num].imshow(zdh)
+        ax[ax_num].set_title('')
+        ax[ax_num].set_ylabel('')
+        ax[ax_num].set_xlabel(s_col_label,fontsize = 'x-large')
+        if tu_rescale == (0,0):
+            if len(ax)>1:
+                ax[ax_num].set_xlabel(f'{s_col_label} ({int(np.quantile(a_crop,0.03))} - {int(1.5*np.quantile(a_crop,0.998))})')
+        ax[ax_num].set_xticklabels('')
+    #pixel to micron (apply after ax is returned)
+    #ax[0].set_yticklabels([str(int(re.sub(u"\u2212", "-", item.get_text()))*i_micron_per_pixel) for item in ax[0].get_yticklabels(minor=False)])
+    plt.suptitle(s_title,y=0.93,size = 'xx-large',weight='bold')
+    plt.subplots_adjust(wspace=.05, hspace=.05)
+    # Now adding the colorbar
+    norm = mpl.colors.Normalize(vmin=tu_max[0],vmax=tu_max[1])
+    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
+    sm.set_array([])
+    if len(ax) == 1:
+        cbaxes = fig.add_axes([.88, 0.125, 0.02, 0.75]) #[left, bottom, width, height]
+        plt.colorbar(sm, cax=cbaxes)#,format=ticker.FuncFormatter(fmt))
+        plt.figtext(0.47,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+    elif tu_rescale != (0,0):
+        cbaxes = fig.add_axes([.91, 0.15, 0.015, 0.7]) #[left, bottom, width, height]
+        plt.colorbar(sm, cax=cbaxes)#,format=ticker.FuncFormatter(fmt))
+        plt.figtext(0.42,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+    else:
+        print("Different ranges - can't use colorbar") 
+        plt.figtext(0.43,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+
+    return(fig,ax) 
+
+def multicolor_png(df_img,df_dapi,s_scene,d_overlay,d_crop,es_dim={'CD8','FoxP3','ER','AR'},es_bright={'Ki67','pHH3'},low_thresh=4000,high_thresh=0.999):
+    '''
+    create RGB image with Dapi plus four - 6 channels
+    '''
+
+    d_result = {}
+    #print(s_scene)
+    tu_crop = d_crop[s_scene]
+    df_slide = df_img[df_img.scene == s_scene]
+    x=tu_crop[1]
+    y=tu_crop[0]
+    img_dapi = skimage.io.imread(df_dapi[df_dapi.scene==s_scene].path[0])
+    a_crop = img_dapi[x:x+800,y:y+800]
+    a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop,in_range=(np.quantile(img_dapi,0.2),1.5*np.quantile(img_dapi,high_thresh)),out_range=(0, 255))
+    if 1.5*np.quantile(img_dapi,high_thresh) < low_thresh:
+                a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop,in_range=(low_thresh/2,low_thresh),out_range=(0, 255))
+    elif len(es_dim.intersection(set(['DAPI'])))==1:
+                new_thresh = float(str(high_thresh)[:-2])
+                a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop,in_range=(np.quantile(img_dapi,0.2),1.5*np.quantile(img_dapi,new_thresh)),out_range=(0, 255))
+    elif len(es_bright.intersection(set(['DAPI'])))==1:
+                a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop,in_range=(np.quantile(img_dapi,0.2),1.5*np.quantile(img_dapi,float(str(high_thresh) + '99'))),out_range=(0, 255))
+
+    #RGB
+    for s_type, ls_marker in d_overlay.items():
+        #print(s_type)
+        zdh = np.dstack((np.zeros_like(a_rescale_dapi), np.zeros_like(a_rescale_dapi),a_rescale_dapi))
+        for idx, s_marker in enumerate(ls_marker):
+            #print(s_marker)
+            s_index = df_slide[df_slide.marker == s_marker].index[0]
+            img = skimage.io.imread(df_slide.loc[s_index,'path'])
+            a_crop = img[x:x+800,y:y+800]
+            in_range = (np.quantile(a_crop,0.2),1.5*np.quantile(a_crop,high_thresh))
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=in_range,out_range=(0, 255))
+            if 1.5*np.quantile(a_crop,high_thresh) < low_thresh:
+                #print('low thresh')
+                in_range=(low_thresh/2,low_thresh)
+                a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=in_range,out_range=(0, 255))
+            elif len(es_dim.intersection(set([s_marker])))==1:
+                #print('dim')
+                new_thresh = float(str(high_thresh)[:-2])
+                in_range=(np.quantile(a_crop,0.2),1.5*np.quantile(a_crop,new_thresh))
+                a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=in_range,out_range=(0, 255))
+            elif len(es_bright.intersection(set([s_marker])))==1:
+                #print('bright')
+                in_range=(np.quantile(a_crop,0.2),1.5*np.quantile(a_crop,float(str(high_thresh) + '99')))
+                a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=in_range,out_range=(0, 255))
+
+            #print(f'low {int(in_range[0])} high {int(in_range[1])}')
+            if idx == 0:
+                zdh = zdh + np.dstack((np.zeros_like(a_rescale), a_rescale,np.zeros_like(a_rescale)))
+
+            elif idx == 1:
+                zdh = zdh + np.dstack((a_rescale, a_rescale,np.zeros_like(a_rescale)))
+
+            elif idx == 2:
+                zdh = zdh + np.dstack((a_rescale, np.zeros_like(a_rescale),np.zeros_like(a_rescale) ))
+
+            elif idx == 3:
+                zdh = zdh + np.dstack((np.zeros_like(a_rescale), a_rescale, a_rescale))
+        #print(zdh.min())
+        zdh = zdh.clip(0,255)
+        zdh = zdh.astype('uint8')
+        #print(zdh.max())
+        d_result.update({s_type:(ls_marker,zdh)})
+    return(d_result)
+
+def roi_if_border(df_img,df_dapi,df_border,s_label='rounds',s_title='Title',tu_crop=(0,0,100,100),tu_array=(2,4),tu_fig=(10,20),tu_rescale=(0,0),i_expnorm=0,i_micron_per_pixel=.325):
+    """
+    create a grid of images
+    df_img = dataframe of images with columns having image attributes
+        and index with image names
+    df_dapi = like df_img, but with the matching dapi images
+    df_border: index is border image file name
+    s_label= attribute to label axes
+    s_title = x axis title
+    tu_crop = (upper left corner x,  y , xlength, yheight)
+    tu_array = subplot array dimensions
+    tu_fig = size of figue
+    tu_rescale= 
+    i_expnorm = 
+    """
+    cmap = mpl.colors.LinearSegmentedColormap.from_list('cmap', [(0,0,0),(0,1,0)], N=256, gamma=1.0)
+    fig, ax = plt.subplots(tu_array[0],tu_array[1],figsize=tu_fig,sharey=True, squeeze=False) #
+    ax = ax.ravel()
+    for ax_num, s_index in enumerate(df_img.index):
+        s_col_label = df_img.loc[s_index,s_label]
+        #load image, copr, rescale
+        a_image=skimage.io.imread(s_index)
+        a_dapi = skimage.io.imread((df_dapi).index[0])# & (df_dapi.rounds=='R1')
+        a_crop = a_image[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        a_crop_dapi = a_dapi[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        #a_crop_dapi = (a_crop_dapi/255).astype('int')
+        if i_expnorm > 0:
+            a_crop = a_crop/df_img.loc[s_index,'exposure']*i_expnorm
+        if tu_rescale==(0,0):
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range=(np.quantile(a_crop,0.03),1.5*np.quantile(a_crop,0.998)),out_range=(0, 255))
+            tu_max = (np.quantile(a_crop,0.03),1.5*np.quantile(a_crop,0.998))
+        else:
+            print(f'original {a_crop.min()},{a_crop.max()}')
+            print(f'rescale to {tu_rescale}')
+            a_rescale = skimage.exposure.rescale_intensity(a_crop,in_range = tu_rescale,out_range=(0,255))
+            tu_max=tu_rescale
+        a_rescale_dapi = skimage.exposure.rescale_intensity(a_crop_dapi,in_range = (np.quantile(a_crop_dapi,0.03),2*np.quantile(a_crop_dapi,0.99)),out_range=(0,255)) 
+        a_rescale_dapi = a_rescale_dapi.astype(np.uint8)
+        a_rescale = a_rescale.astype(np.uint8)
+        #white border
+        s_border_index = df_border[df_border.marker==(df_img.loc[s_index,'marker'])].index[0]
+        a_border = skimage.io.imread(s_border_index)
+        a_crop_border = a_border[(tu_crop[1]):(tu_crop[1]+tu_crop[3]),(tu_crop[0]):(tu_crop[0]+tu_crop[2])]
+        mask = a_crop_border > 250
+        #2 color png
+        zdh = np.dstack((np.zeros_like(a_rescale), a_rescale, a_rescale_dapi))
+        zdh[mask] = 255
+        #zdh = zdh.clip(0,255)
+        #zdh = zdh.astype('uint8')
+        ax[ax_num].imshow(zdh)
+        ax[ax_num].set_title('')
+        ax[ax_num].set_ylabel('')
+        ax[ax_num].set_xlabel(s_col_label,fontsize = 'x-large')
+        if tu_rescale == (0,0):
+            if len(ax)>1:
+                ax[ax_num].set_xlabel(f'{s_col_label} ({int(np.quantile(a_crop,0.03))} - {int(1.5*np.quantile(a_crop,0.998))})')
+        ax[ax_num].set_xticklabels('')
+    #pixel to micron (apply after ax is returned)
+    #ax[0].set_yticklabels([str(int(re.sub(u"\u2212", "-", item.get_text()))*i_micron_per_pixel) for item in ax[0].get_yticklabels(minor=False)])
+    plt.suptitle(s_title,y=0.93,size = 'xx-large',weight='bold')
+    plt.subplots_adjust(wspace=.05, hspace=.05)
+    # Now adding the colorbar
+    norm = mpl.colors.Normalize(vmin=tu_max[0],vmax=tu_max[1])
+    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
+    sm.set_array([])
+    if len(ax) == 1:
+        cbaxes = fig.add_axes([.88, 0.125, 0.02, 0.75]) #[left, bottom, width, height]
+        plt.colorbar(sm, cax = cbaxes)
+        plt.figtext(0.47,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+    elif tu_rescale != (0,0):
+        cbaxes = fig.add_axes([.92, 0.175, 0.02, 0.64]) #[left, bottom, width, height]
+        plt.colorbar(sm, cax = cbaxes)
+        plt.figtext(0.42,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+    else:
+        print("Different ranges - can't use colorbar") 
+        plt.figtext(0.43,0.03,s_label.replace('_',' '),fontsize = 'x-large', weight='bold')
+
+    return(fig,ax,a_crop_border) 
+
diff --git a/mplex_image/normalize.py b/mplex_image/normalize.py
new file mode 100755
index 0000000..2c03147
--- /dev/null
+++ b/mplex_image/normalize.py
@@ -0,0 +1,536 @@
+#from https://github.com/brentp/combat.py/blob/master/combat.py
+import patsy
+import sys
+import numpy.linalg as la
+import numpy as np
+import pandas as pd
+import sys
+import matplotlib.pyplot as plt
+
+def aprior(gamma_hat):
+    m = gamma_hat.mean()
+    s2 = gamma_hat.var()
+    return (2 * s2 +m**2) / s2
+
+def bprior(gamma_hat):
+    m = gamma_hat.mean()
+    s2 = gamma_hat.var()
+    return (m*s2+m**3)/s2
+
+def it_sol(sdat, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001):
+    n = (1 - np.isnan(sdat)).sum(axis=1)
+    g_old = g_hat.copy()
+    d_old = d_hat.copy()
+
+    change = 1
+    count = 0
+    while change > conv:
+        #print g_hat.shape, g_bar.shape, t2.shape
+        g_new = postmean(g_hat, g_bar, n, d_old, t2)
+        sum2 = ((sdat - np.dot(g_new.values.reshape((g_new.shape[0], 1)), np.ones((1, sdat.shape[1])))) ** 2).sum(axis=1)
+        d_new = postvar(sum2, n, a, b)
+       
+        change = max((abs(g_new - g_old) / g_old).max(), (abs(d_new - d_old) / d_old).max())
+        g_old = g_new #.copy()
+        d_old = d_new #.copy()
+        count = count + 1
+    adjust = (g_new, d_new)
+    return adjust 
+
+def postmean(g_hat, g_bar, n, d_star, t2):
+    return (t2*n*g_hat+d_star * g_bar) / (t2*n+d_star)
+
+def postvar(sum2, n, a, b):
+    return (0.5 * sum2 + b) / (n / 2.0 + a - 1.0)
+
+def design_mat(mod, numerical_covariates, batch_levels):
+    # require levels to make sure they are in the same order as we use in the
+    # rest of the script.
+    design = patsy.dmatrix("~ 0 + C(batch, levels=%s)" % str(batch_levels),
+                                                  mod, return_type="dataframe")
+
+    mod = mod.drop(["batch"], axis=1)
+    numerical_covariates = list(numerical_covariates)
+    sys.stderr.write("found %i batches\n" % design.shape[1])
+    other_cols = [c for i, c in enumerate(mod.columns)
+                  if not i in numerical_covariates]
+    factor_matrix = mod[other_cols]
+    design = pd.concat((design, factor_matrix), axis=1)
+    if numerical_covariates is not None:
+        sys.stderr.write("found %i numerical covariates...\n"
+                            % len(numerical_covariates))
+        for i, nC in enumerate(numerical_covariates):
+            cname = mod.columns[nC]
+            sys.stderr.write("\t{0}\n".format(cname))
+            design[cname] = mod[mod.columns[nC]]
+    sys.stderr.write("found %i categorical variables:" % len(other_cols))
+    sys.stderr.write("\t" + ", ".join(other_cols) + '\n')
+    return design
+
+def combat(data, batch, model=None, numerical_covariates=None):
+    """Correct for batch effects in a dataset
+    Parameters
+    ----------
+    data : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the expression or methylation
+        data to batch correct
+    batch : pandas.Series
+        A column corresponding to the batches in the data, with index same as
+        the columns that appear in ``data``
+    model : patsy.design_info.DesignMatrix, optional
+        A model matrix describing metadata on the samples which could be
+        causing batch effects. If not provided, then will attempt to coarsely
+        correct just from the information provided in ``batch``
+    numerical_covariates : list-like
+        List of covariates in the model which are numerical, rather than
+        categorical
+    Returns
+    -------
+    corrected : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the batch-corrected data
+    """
+    if isinstance(numerical_covariates, str):
+        numerical_covariates = [numerical_covariates]
+    if numerical_covariates is None:
+        numerical_covariates = []
+
+    if model is not None and isinstance(model, pd.DataFrame):
+        model["batch"] = list(batch)
+    else:
+        model = pd.DataFrame({'batch': batch})
+
+    batch_items = model.groupby("batch").groups.items()
+    batch_levels = [k for k, v in batch_items]
+    batch_info = [v for k, v in batch_items]
+    n_batch = len(batch_info)
+    n_batches = np.array([len(v) for v in batch_info])
+    n_array = float(sum(n_batches))
+
+    # drop intercept
+    drop_cols = [cname for cname, inter in  ((model == 1).all()).iteritems() if inter == True]
+    drop_idxs = [list(model.columns).index(cdrop) for cdrop in drop_cols]
+    model = model[[c for c in model.columns if not c in drop_cols]]
+    numerical_covariates = [list(model.columns).index(c) if isinstance(c, str) else c
+            for c in numerical_covariates if not c in drop_cols]
+
+    design = design_mat(model, numerical_covariates, batch_levels)
+
+    sys.stderr.write("Standardizing Data across genes.\n")
+    #error shapes (3,7200) and (26,7200) not aligned: 7200 (dim 1) != 26 (dim 0)
+    B_hat = np.dot(np.dot(la.inv(np.dot(design.T, design)), design.T), data.T) #data.T
+    grand_mean = np.dot((n_batches / n_array).T, B_hat[:n_batch,:])
+    var_pooled = np.dot(((data - np.dot(design, B_hat).T)**2), np.ones((int(n_array), 1)) / int(n_array))
+
+    stand_mean = np.dot(grand_mean.T.reshape((len(grand_mean), 1)), np.ones((1, int(n_array))))
+    tmp = np.array(design.copy())
+    tmp[:,:n_batch] = 0
+    stand_mean  += np.dot(tmp, B_hat).T
+
+    s_data = ((data - stand_mean) / np.dot(np.sqrt(var_pooled), np.ones((1, int(n_array)))))
+
+    sys.stderr.write("Fitting L/S model and finding priors\n")
+    batch_design = design[design.columns[:n_batch]]
+    gamma_hat = np.dot(np.dot(la.inv(np.dot(batch_design.T, batch_design)), batch_design.T), s_data.T)
+
+    delta_hat = []
+
+    for i, batch_idxs in enumerate(batch_info):
+        #batches = [list(model.columns).index(b) for b in batches]
+        delta_hat.append(s_data[batch_idxs].var(axis=1))
+
+    gamma_bar = gamma_hat.mean(axis=1) 
+    t2 = gamma_hat.var(axis=1)
+   
+
+    a_prior = list(map(aprior, delta_hat))
+    b_prior = list(map(bprior, delta_hat))
+
+    sys.stderr.write("Finding parametric adjustments\n")
+    gamma_star, delta_star = [], []
+    for i, batch_idxs in enumerate(batch_info):
+        #print '18 20 22 28 29 31 32 33 35 40 46'
+        #print batch_info[batch_id]
+
+        temp = it_sol(s_data[batch_idxs], gamma_hat[i],
+                     delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
+
+        gamma_star.append(temp[0])
+        delta_star.append(temp[1])
+
+    sys.stdout.write("Adjusting data\n")
+    bayesdata = s_data
+    gamma_star = np.array(gamma_star)
+    delta_star = np.array(delta_star)
+
+
+    for j, batch_idxs in enumerate(batch_info):
+
+        dsq = np.sqrt(delta_star[j,:])
+        dsq = dsq.reshape((len(dsq), 1))
+        denom =  np.dot(dsq, np.ones((1, n_batches[j])))
+        numer = np.array(bayesdata[batch_idxs] - np.dot(batch_design.loc[batch_idxs], gamma_star).T)
+
+        bayesdata[batch_idxs] = numer / denom
+   
+    vpsq = np.sqrt(var_pooled).reshape((len(var_pooled), 1))
+    bayesdata = bayesdata * np.dot(vpsq, np.ones((1, int(n_array)))) + stand_mean
+ 
+    return bayesdata
+
+#adapted from https://github.com/brentp/combat.py/blob/master/combat.py
+
+
+def combat_fit(data, batch, model=None, numerical_covariates=None):
+    """Correct for batch effects in a dataset
+    Parameters
+    ----------
+    data : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the expression or methylation
+        data to batch correct
+    batch : pandas.Series
+        A column corresponding to the batches in the data, with index same as
+        the columns that appear in ``data``
+    model : patsy.design_info.DesignMatrix, optional
+        A model matrix describing metadata on the samples which could be
+        causing batch effects. If not provided, then will attempt to coarsely
+        correct just from the information provided in ``batch``
+    numerical_covariates : list-like
+        List of covariates in the model which are numerical, rather than
+        categorical
+    Returns
+    -------
+    gamma_star : centering parameters from combat fitting
+    delta_star : scaling parameters from combat fitting
+    stand_mean: pooled mean of batches
+    var_pooled: pooled variance of batches
+    """
+    if isinstance(numerical_covariates, str):
+        numerical_covariates = [numerical_covariates]
+    if numerical_covariates is None:
+        numerical_covariates = []
+
+    if model is not None and isinstance(model, pd.DataFrame):
+        model["batch"] = list(batch)
+    else:
+        model = pd.DataFrame({'batch': batch})
+
+    batch_items = model.groupby("batch").groups.items()
+    batch_levels = [k for k, v in batch_items]
+    batch_info = [v for k, v in batch_items]
+    n_batch = len(batch_info)
+    n_batches = np.array([len(v) for v in batch_info])
+    n_array = float(sum(n_batches))
+
+    # drop intercept
+    drop_cols = [cname for cname, inter in  ((model == 1).all()).iteritems() if inter == True]
+    drop_idxs = [list(model.columns).index(cdrop) for cdrop in drop_cols]
+    model = model[[c for c in model.columns if not c in drop_cols]]
+    numerical_covariates = [list(model.columns).index(c) if isinstance(c, str) else c
+            for c in numerical_covariates if not c in drop_cols]
+
+    design = design_mat(model, numerical_covariates, batch_levels)
+
+    sys.stderr.write("Standardizing Data across genes.\n")
+    B_hat = np.dot(np.dot(la.inv(np.dot(design.T, design)), design.T), data.T) 
+    grand_mean = np.dot((n_batches / n_array).T, B_hat[:n_batch,:])
+    var_pooled = np.dot(((data - np.dot(design, B_hat).T)**2), np.ones((int(n_array), 1)) / int(n_array))
+
+    stand_mean = np.dot(grand_mean.T.reshape((len(grand_mean), 1)), np.ones((1, int(n_array))))
+    tmp = np.array(design.copy())
+    tmp[:,:n_batch] = 0
+    stand_mean  += np.dot(tmp, B_hat).T
+
+    s_data = ((data - stand_mean) / np.dot(np.sqrt(var_pooled), np.ones((1, int(n_array)))))
+
+    sys.stderr.write("Fitting L/S model and finding priors\n")
+    batch_design = design[design.columns[:n_batch]]
+    gamma_hat = np.dot(np.dot(la.inv(np.dot(batch_design.T, batch_design)), batch_design.T), s_data.T)
+
+    delta_hat = []
+
+    for i, batch_idxs in enumerate(batch_info):
+        delta_hat.append(s_data[batch_idxs].var(axis=1))
+
+    gamma_bar = gamma_hat.mean(axis=1) 
+    t2 = gamma_hat.var(axis=1)
+
+
+    a_prior = list(map(aprior, delta_hat))
+    b_prior = list(map(bprior, delta_hat))
+
+    sys.stderr.write("Finding parametric adjustments\n")
+    gamma_star, delta_star = [], []
+    for i, batch_idxs in enumerate(batch_info):
+        temp = it_sol(s_data[batch_idxs], gamma_hat[i],
+                     delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
+
+        gamma_star.append(temp[0])
+        delta_star.append(temp[1])
+    #just retrun one stand_mean array
+    stand_mean = stand_mean[:,0]
+    return(gamma_star, delta_star, stand_mean, var_pooled)
+        
+def combat_transform(data, batch, gamma_star, delta_star, stand_mean, var_pooled,model=None, numerical_covariates=None):
+    """Correct for batch effects in a dataset
+    Parameters
+    ----------
+    data : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the expression or methylation
+        data to batch correct
+    batch : pandas.Series
+        A column corresponding to the batches in the data, with index same as
+        the columns that appear in ``data``
+    gamma_star : centering parameters from combat fitting
+    delta_star : scaling parameters from combat fitting
+    stand_mean: pooled mean of batches
+    var_pooled: pooled variance of batches
+    model : patsy.design_info.DesignMatrix, optional
+        A model matrix describing metadata on the samples which could be
+        causing batch effects. If not provided, then will attempt to coarsely
+        correct just from the information provided in ``batch``
+    numerical_covariates : list-like
+        List of covariates in the model which are numerical, rather than
+        categorical
+    Returns
+    -------
+    corrected : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the batch-corrected data
+    """
+    #get design
+    if isinstance(numerical_covariates, str):
+        numerical_covariates = [numerical_covariates]
+    if numerical_covariates is None:
+        numerical_covariates = []
+
+    if model is not None and isinstance(model, pd.DataFrame):
+        model["batch"] = list(batch)
+    else:
+        model = pd.DataFrame({'batch': batch})
+    batch_items = model.groupby("batch").groups.items()
+    batch_levels = [k for k, v in batch_items]
+    batch_info = [v for k, v in batch_items]
+    n_batch = len(batch_info)
+    n_batches = np.array([len(v) for v in batch_info])
+    n_array = float(sum(n_batches))
+    # drop intercept
+    drop_cols = [cname for cname, inter in  ((model == 1).all()).iteritems() if inter == True]
+    drop_idxs = [list(model.columns).index(cdrop) for cdrop in drop_cols]
+    model = model[[c for c in model.columns if not c in drop_cols]]
+    numerical_covariates = [list(model.columns).index(c) if isinstance(c, str) else c
+            for c in numerical_covariates if not c in drop_cols]
+    design = design_mat(model, numerical_covariates, batch_levels)
+    #standardize
+    sys.stderr.write("Standardizing Data across genes.\n")
+
+    #reshape stand mean
+    stand_mean = np.dot(stand_mean.T.reshape((len(stand_mean), 1)), np.ones((1, int(data.shape[1]))))
+    s_data = ((data - stand_mean) / np.dot(np.sqrt(var_pooled), np.ones((1, int(n_array)))))
+    batch_design = design[design.columns[:n_batch]]
+    # adjust data
+    sys.stdout.write("Adjusting data\n")
+    bayesdata = s_data
+    gamma_star = np.array(gamma_star)
+    delta_star = np.array(delta_star)
+    #for each batch
+    for j, batch_idxs in enumerate(batch_info):
+
+        dsq = np.sqrt(delta_star[j,:])
+        dsq = dsq.reshape((len(dsq), 1))
+        denom =  np.dot(dsq, np.ones((1, n_batches[j]))) #divide by sqrt delta_star
+        numer = np.array(bayesdata[batch_idxs] - np.dot(batch_design.loc[batch_idxs], gamma_star).T) #subtract gamma_star
+
+        bayesdata[batch_idxs] = numer / denom
+    #multiply by square root of variance and add mean
+    vpsq = np.sqrt(var_pooled).reshape((len(var_pooled), 1))
+    bayesdata = bayesdata * np.dot(vpsq, np.ones((1, int(n_array)))) + stand_mean
+    return bayesdata
+
+
+def combat_fit_old(data, batch, model=None, numerical_covariates=None):
+    """Correct for batch effects in a dataset
+    Parameters
+    ----------
+    data : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the expression or methylation
+        data to batch correct
+    batch : pandas.Series
+        A column corresponding to the batches in the data, with index same as
+        the columns that appear in ``data``
+    model : patsy.design_info.DesignMatrix, optional
+        A model matrix describing metadata on the samples which could be
+        causing batch effects. If not provided, then will attempt to coarsely
+        correct just from the information provided in ``batch``
+    numerical_covariates : list-like
+        List of covariates in the model which are numerical, rather than
+        categorical
+    Returns
+    -------
+    gamma_star : centering parameters from combat fitting
+    delta_star : scaling parameters from combat fitting
+    """
+    if isinstance(numerical_covariates, str):
+        numerical_covariates = [numerical_covariates]
+    if numerical_covariates is None:
+        numerical_covariates = []
+
+    if model is not None and isinstance(model, pd.DataFrame):
+        model["batch"] = list(batch)
+    else:
+        model = pd.DataFrame({'batch': batch})
+
+    batch_items = model.groupby("batch").groups.items()
+    batch_levels = [k for k, v in batch_items]
+    batch_info = [v for k, v in batch_items]
+    n_batch = len(batch_info)
+    n_batches = np.array([len(v) for v in batch_info])
+    n_array = float(sum(n_batches))
+
+    # drop intercept
+    drop_cols = [cname for cname, inter in  ((model == 1).all()).iteritems() if inter == True]
+    drop_idxs = [list(model.columns).index(cdrop) for cdrop in drop_cols]
+    model = model[[c for c in model.columns if not c in drop_cols]]
+    numerical_covariates = [list(model.columns).index(c) if isinstance(c, str) else c
+            for c in numerical_covariates if not c in drop_cols]
+
+    design = design_mat(model, numerical_covariates, batch_levels)
+
+    sys.stderr.write("Standardizing Data across genes.\n")
+    B_hat = np.dot(np.dot(la.inv(np.dot(design.T, design)), design.T), data.T) 
+    grand_mean = np.dot((n_batches / n_array).T, B_hat[:n_batch,:])
+    var_pooled = np.dot(((data - np.dot(design, B_hat).T)**2), np.ones((int(n_array), 1)) / int(n_array))
+
+    stand_mean = np.dot(grand_mean.T.reshape((len(grand_mean), 1)), np.ones((1, int(n_array))))
+    tmp = np.array(design.copy())
+    tmp[:,:n_batch] = 0
+    stand_mean  += np.dot(tmp, B_hat).T
+
+    s_data = ((data - stand_mean) / np.dot(np.sqrt(var_pooled), np.ones((1, int(n_array)))))
+
+    sys.stderr.write("Fitting L/S model and finding priors\n")
+    batch_design = design[design.columns[:n_batch]]
+    gamma_hat = np.dot(np.dot(la.inv(np.dot(batch_design.T, batch_design)), batch_design.T), s_data.T)
+
+    delta_hat = []
+
+    for i, batch_idxs in enumerate(batch_info):
+        delta_hat.append(s_data[batch_idxs].var(axis=1))
+
+    gamma_bar = gamma_hat.mean(axis=1) 
+    t2 = gamma_hat.var(axis=1)
+   
+
+    a_prior = list(map(aprior, delta_hat))
+    b_prior = list(map(bprior, delta_hat))
+
+    sys.stderr.write("Finding parametric adjustments\n")
+    gamma_star, delta_star = [], []
+    for i, batch_idxs in enumerate(batch_info):
+        temp = it_sol(s_data[batch_idxs], gamma_hat[i],
+                     delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
+
+        gamma_star.append(temp[0])
+        delta_star.append(temp[1])
+    return(gamma_star, delta_star)
+        
+def combat_transform_old(data, batch, gamma_star, delta_star,model=None, numerical_covariates=None):
+    """Correct for batch effects in a dataset
+    Parameters
+    ----------
+    data : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the expression or methylation
+        data to batch correct
+    batch : pandas.Series
+        A column corresponding to the batches in the data, with index same as
+        the columns that appear in ``data``
+    gamma_star : centering parameters from combat fitting
+    delta_star : scaling parameters from combat fitting
+    model : patsy.design_info.DesignMatrix, optional
+        A model matrix describing metadata on the samples which could be
+        causing batch effects. If not provided, then will attempt to coarsely
+        correct just from the information provided in ``batch``
+    numerical_covariates : list-like
+        List of covariates in the model which are numerical, rather than
+        categorical
+    Returns
+    -------
+    corrected : pandas.DataFrame
+        A (n_features, n_samples) dataframe of the batch-corrected data
+    """
+    #get design
+    if isinstance(numerical_covariates, str):
+        numerical_covariates = [numerical_covariates]
+    if numerical_covariates is None:
+        numerical_covariates = []
+
+    if model is not None and isinstance(model, pd.DataFrame):
+        model["batch"] = list(batch)
+    else:
+        model = pd.DataFrame({'batch': batch})
+    batch_items = model.groupby("batch").groups.items()
+    batch_levels = [k for k, v in batch_items]
+    batch_info = [v for k, v in batch_items]
+    n_batch = len(batch_info)
+    n_batches = np.array([len(v) for v in batch_info])
+    n_array = float(sum(n_batches))
+    # drop intercept
+    drop_cols = [cname for cname, inter in  ((model == 1).all()).iteritems() if inter == True]
+    drop_idxs = [list(model.columns).index(cdrop) for cdrop in drop_cols]
+    model = model[[c for c in model.columns if not c in drop_cols]]
+    numerical_covariates = [list(model.columns).index(c) if isinstance(c, str) else c
+            for c in numerical_covariates if not c in drop_cols]
+    design = design_mat(model, numerical_covariates, batch_levels)
+    #standardize
+    sys.stderr.write("Standardizing Data across genes.\n")
+    B_hat = np.dot(np.dot(la.inv(np.dot(design.T, design)), design.T), data.T) 
+    grand_mean = np.dot((n_batches / n_array).T, B_hat[:n_batch,:])
+    var_pooled = np.dot(((data - np.dot(design, B_hat).T)**2), np.ones((int(n_array), 1)) / int(n_array))
+
+    stand_mean = np.dot(grand_mean.T.reshape((len(grand_mean), 1)), np.ones((1, int(n_array))))
+    tmp = np.array(design.copy())
+    tmp[:,:n_batch] = 0
+    stand_mean  += np.dot(tmp, B_hat).T
+    s_data = ((data - stand_mean) / np.dot(np.sqrt(var_pooled), np.ones((1, int(n_array)))))
+    batch_design = design[design.columns[:n_batch]]
+    # adjust data
+    sys.stdout.write("Adjusting data\n")
+    bayesdata = s_data
+    gamma_star = np.array(gamma_star)
+    delta_star = np.array(delta_star)
+    #for each batch
+    for j, batch_idxs in enumerate(batch_info):
+
+        dsq = np.sqrt(delta_star[j,:])
+        dsq = dsq.reshape((len(dsq), 1))
+        denom =  np.dot(dsq, np.ones((1, n_batches[j]))) #divide by sqrt delta_star
+        numer = np.array(bayesdata[batch_idxs] - np.dot(batch_design.loc[batch_idxs], gamma_star).T) #subtract gamma_star
+
+        bayesdata[batch_idxs] = numer / denom
+    #multiply by square root of variance and add mean
+    vpsq = np.sqrt(var_pooled).reshape((len(var_pooled), 1))
+    bayesdata = bayesdata * np.dot(vpsq, np.ones((1, int(n_array)))) + stand_mean
+    return bayesdata
+
+def plot_histograms(df_norm,df,s_train,s_tissue):
+    '''
+    for each marker, return a histogram of trianing data and transformed data (df_norm)
+    '''
+    bins=50
+    d_fig = {}
+    for s_marker in df_norm.columns[df_norm.dtypes=='float64']:
+        print(s_marker)
+        fig,ax=plt.subplots(2,1,figsize = (3,4))
+        for idxs, s_batch in enumerate(sorted(set(df_norm.batch))):
+            df_batch = df_norm[(df_norm.batch==s_batch)].loc[:,s_marker] 
+            if len(df_batch.dropna()) == 0:
+                continue
+            ax[0].hist(df.loc[df.index.str.contains(s_batch),s_marker],bins=bins,alpha=0.4, color=f'C{idxs}')
+            ax[1].hist(df_batch,bins=bins,alpha=0.4, color=f'C{idxs}',label=s_batch)
+            ax[0].set_yscale('log')
+            ax[1].set_yscale('log')
+            ax[0].set_title(f'{s_marker.split("_")[0]}: Raw Data')
+            ax[1].set_title(f'{s_marker.split("_")[0]}: Combat')
+            ax[1].legend()
+        plt.tight_layout()
+        plt.close()
+        d_fig.update({s_marker:fig})
+    return(d_fig)
\ No newline at end of file
diff --git a/mplex_image/ometiff.py b/mplex_image/ometiff.py
new file mode 100755
index 0000000..9986c6d
--- /dev/null
+++ b/mplex_image/ometiff.py
@@ -0,0 +1,76 @@
+####
+# title: mpimage.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to display, normalize and crop multiplex images
+####
+
+#libraries
+import matplotlib as mpl
+mpl.use('agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage
+import pandas as pd
+#import bioformats 
+import re
+import shutil
+from itertools import chain
+import matplotlib.ticker as ticker
+
+os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF/')
+from apeer_ometiff_library import omexmlClass
+
+#functions
+
+def gen_xml(array, channel_names):
+    '''
+    copy and modify from apeer ome tiff
+    ls_marker
+    '''
+    #for idx, s_marker in enumerate(ls_marker):
+    #    old = bytes(f'Name="C:{idx}"','utf-8')
+    #    new = bytes(f'Name="{s_marker}"','utf-8')
+    #    s_xml = s_xml.replace(old,new,-1)
+    #Dimension order is assumed to be TZCYX
+    dim_order = "TZCYX"
+    
+    metadata = omexmlClass.OMEXML()
+    shape = array.shape
+    assert ( len(shape) == 5), "Expected array of 5 dimensions"
+    
+    metadata.image().set_Name("IMAGE")
+    metadata.image().set_ID("0")
+    
+    pixels = metadata.image().Pixels
+    pixels.ome_uuid = metadata.uuidStr
+    pixels.set_ID("0")
+    
+    pixels.channel_count = shape[2]
+    
+    pixels.set_SizeT(shape[0])
+    pixels.set_SizeZ(shape[1])
+    pixels.set_SizeC(shape[2])
+    pixels.set_SizeY(shape[3])
+    pixels.set_SizeX(shape[4])
+    
+    pixels.set_DimensionOrder(dim_order[::-1])
+    
+    pixels.set_PixelType(omexmlClass.get_pixel_type(array.dtype))
+    
+    for i in range(pixels.SizeC):
+        pixels.Channel(i).set_ID("Channel:0:" + str(i))
+        pixels.Channel(i).set_Name(channel_names[i])
+    
+    for i in range(pixels.SizeC):
+        pixels.Channel(i).set_SamplesPerPixel(1)
+        
+    pixels.populate_TiffData()
+    
+    return metadata.to_xml().encode()
diff --git a/mplex_image/preprocess.py b/mplex_image/preprocess.py
new file mode 100755
index 0000000..a54e54b
--- /dev/null
+++ b/mplex_image/preprocess.py
@@ -0,0 +1,705 @@
+####
+# title: preprocess.py
+#
+# language: Python3.6
+# date: 2019-06-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to prepare images and other inputs for guillaumes segmentation software
+####
+
+#libraries
+import pandas as pd
+import matplotlib as mpl
+mpl.use('agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage
+import shutil
+import re
+
+#set src path (CHANGE ME)
+s_src_path = '/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF'
+s_work_path = '/home/groups/graylab_share/Chin_Lab/ChinData/Work/engje'
+
+# function
+# import importlib
+# importlib.reload(preprocess)
+
+def check_names(df_img,s_type='tiff'):
+    """
+    (CHANGE ME)
+    Based on filenames in segment folder, 
+    checks marker names against standard list of biomarkers
+    returns a dataframe with Rounds Cycles Info, and sets of wrong and correct names
+    Input: s_find = string that will be unique to one scene to check in the folder
+    """
+    if s_type == 'tiff':
+        es_names = set(df_img.marker)
+    elif s_type == 'czi':
+        lls_marker =  [item.split('.') for item  in df_img.markers]
+        es_names =  set([item for sublist in lls_marker for item in sublist])
+    else :
+        print('Unknown type')
+    es_standard = {'DAPI','PDL1','pERK','CK19','pHH3','CK14','Ki67','Ecad','PCNA','HER2','ER','CD44',
+        'aSMA','AR','pAKT','LamAC','CK5','EGFR','pRB','FoxP3','CK7','PDPN','CD4','PgR','Vim',
+        'CD8','CD31','CD45','panCK','CD68','PD1','CD20','CK8','cPARP','ColIV','ColI','CK17',
+        'H3K4','gH2AX','CD3','H3K27','53BP1','BCL2','GRNZB','LamB1','pS6RP','BAX','RAD51',
+        'R0c2','R0c3','R0c4','R0c5','R5Qc2','R5Qc3','R5Qc4','R5Qc5','R11Qc2','R11Qc3','R11Qc4','R11Qc5',
+        'R7Qc2','R7Qc3','R7Qc4','R7Qc5','PDL1ab','PDL1d','R14Qc2','R14Qc3','R14Qc4','R14Qc5',
+        'R8Qc2','R8Qc3','R8Qc4','R8Qc5','R12Qc2','R12Qc3','R12Qc4','R12Qc5','PgRc4','R1c2','CCND1',
+        'Glut1','CoxIV','LamB2','S100','BMP4','BMP2','BMP6','pS62MYC', 'CGA', 'p63', 'SYP','PDGFRa', 'HIF1a','CC3',
+        'MUC1','CAV1','MSH2','CSF1R','R13Qc4', 'R13Qc5', 'R13Qc3', 'R13Qc2','R10Qc2','R10Qc3','R10Qc4','R10Qc5',
+        'R6Qc2', 'R6Qc3','R6Qc4', 'R6Qc5', 'TUBB3', 'CD90', 'GATA3'}#,'PDGFRB'CD66b (Neutrophils)
+        #HLA class II or CD21(Dendritic cells)
+        #BMP4	Fibronectin, CD11b (dendritic, macrophage/monocyte/granulocyte)	CD163 (macrophages)
+        #CD83 (dendritic cells)	FAP	
+    es_wrong = es_names - es_standard
+    es_right = es_standard.intersection(es_names)
+    print(f'Wrong names {es_wrong}')
+    print(f' Right names {es_right}')
+    return(es_wrong)
+
+def copy_dapis(s_r_old='-R11_',s_r_new='-R91_',s_c_old='_c1_',s_c_new='_c2_',s_find='_c1_ORG.tif',b_test=True,type='org'):
+    """
+    copy specified round of dapi, rename with new round and color
+    Input:
+    s_r_old = old round
+    s_r_new = new round on copied DAPI
+    s_c_old = old color
+    s_c_new = new color on copied DAPI
+    s_find= how to identify dapis i.e. '_c1_ORG.tif'
+    b_test=True if testing only
+    """
+    i_dapi = re.sub("[^0-9]", "", s_r_old)
+    ls_test = []
+    for s_file in os.listdir():
+            if s_file.find(s_find) > -1:
+                if s_file.find(s_r_old) > -1:
+                    s_file_round = s_file.replace(s_r_old,s_r_new)
+                    s_file_color = s_file_round.replace(s_c_old,s_c_new)
+                    if type=='org':
+                        s_file_dapi = s_file_color.replace(s_file_color.split("_")[1],f'DAPI{i_dapi}.DAPI{i_dapi}.DAPI{i_dapi}.DAPI{i_dapi}')
+                    else:
+                        s_file_dapi=s_file_color
+                    ls_test = ls_test + [s_file]
+                    if b_test:
+                        print(f'copied file {s_file} \t and named {s_file_dapi}')
+                    else:
+                        print(f'copied file {s_file} \t and named {s_file_dapi}')
+                        shutil.copyfile(s_file, s_file_dapi)
+    
+    print(f'total number of files changed is {len(ls_test)}')
+
+def copy_markers(df_img, s_original = 'panCK', ls_copy = ['CK19','CK5','CK7','CK14'],i_last_round = 97, b_test=True, type = 'org'):
+    """
+    copy specified marker image, rename with new round and color (default c2) and marker name
+    Input:
+    s_original = marker to copy
+    df_img = dataframe with images
+    ls_copy = list of fake channels to make
+
+    b_test=True if testing only
+    """
+    df_copy = df_img[df_img.marker==s_original]
+    ls_test = []
+    for s_index in df_copy.index:
+            s_round = df_img.loc[s_index,'rounds']
+            for idx, s_copy in enumerate(ls_copy):
+                i_round = i_last_round + 1 + idx
+                s_round = df_img.loc[s_index,'rounds']
+                s_roundnum = re.sub("[^0-9]", "", s_round)
+                s_round_pre = s_round.replace(s_roundnum,'')
+                s_file_round = s_index.replace(df_img.loc[s_index,'rounds'],f'{s_round_pre}{i_round}')
+                s_file_color = s_file_round.replace(f'_{s_round}_',f'_c{i_round}_')
+                if type == 'org':
+                    s_file_dapi = s_file_color.replace(s_file_color.split("_")[1],f'{s_copy}.{s_copy}.{s_copy}.{s_copy}')
+                else:
+                    s_file_dapi = s_file_color.replace(f'_{s_original}_',f'_{s_copy}_')
+                ls_test = ls_test + [s_index]
+                if b_test:
+                    print(f'copied file {s_index} \t and named {s_file_dapi}')
+                else:
+                    print(f'copied file {s_index} \t and named {s_file_dapi}')
+                    shutil.copyfile(s_index, s_file_dapi)
+    print(f'total number of files changed is {len(ls_test)}')
+
+def dchange_fname(d_rename={'_oldstring_':'_newstring_'},b_test=True):
+    """
+    replace anything in file name, based on dictionary of key = old
+    values = new
+    Input
+    """
+    #d_rename = {'Registered-R11_CD34.AR.':'Registered-R11_CD34.ARcst.','FoxP3b':'FoxP3bio'}
+    for s_key,s_value in d_rename.items():
+        s_old=s_key
+        s_new=s_value
+        #test
+        if b_test:
+            ls_test = []
+            for s_file in os.listdir():
+                if s_file.find(s_old) > -1:
+                    s_file_print = s_file
+                    ls_test = ls_test + [s_file]
+                    len(ls_test)
+                    s_file_new = s_file.replace(s_old,s_new)
+                    #print(f'changed file {s_file}\tto {s_file_new}')
+            if len(ls_test)!=0:
+                print(f'changed file {s_file_print}\tto {s_file_new}')
+            print(f'total number of files changed is {len(ls_test)}')
+        #really rename
+        else:
+            ls_test = []
+            for s_file in os.listdir():
+                if s_file.find(s_old) > -1:
+                    s_file_print = s_file
+                    ls_test = ls_test + [s_file]
+                    len(ls_test)
+                    s_file_new = s_file.replace(s_old,s_new)
+                    #print(f'changed file {s_file}\tto {s_file_new}')
+                    os.rename(s_file, s_file_new) #comment out this line to test
+            if len(ls_test)!=0:
+                print(f'changed file {s_file_print}\tto {s_file_new}')
+            print(f'total number of files changed is {len(ls_test)}')
+
+def csv_change_fname(i_scene_len=2, b_test=True):
+    '''
+    give a csv with wrong_round and correct scene names
+    make a Renamed folder
+    the correct scene is added after, as +correct
+    '''
+    df_test = pd.read_csv(f'FinalSceneNumbers.csv',header=0)
+    df_test = df_test.astype(str)#(works!)
+    if i_scene_len == 2:
+        df_scene = df_test.applymap('{:0>2}'.format)
+    elif i_scene_len == 3:
+        df_test.replace('nan','',inplace=True)
+        df_test.replace(to_replace = "\.0+$",value = "", regex = True,inplace=True)
+        df_scene = df_test.applymap('{:0>3}'.format)
+    else:
+        df_scene = df_test #.applymap('{:0>3}'.format)
+    #for each round with wrong names
+    for s_wrong in  df_scene.columns[df_scene.columns.str.contains('wrong')]:
+        for s_file in os.listdir():
+            #find files in that round
+            if s_file.find(f'R{s_wrong.split("_")[1]}_') > -1:
+                #print(s_file)
+                #for each scene
+                for s_index in df_scene.index:
+                    s_wrong_scene = df_scene.loc[s_index,s_wrong]
+                    if s_file.find(f'-Scene-{s_wrong_scene}') > -1:
+                        s_correct = df_scene.loc[s_index,'correct']
+                        print(s_correct)
+                        s_replace = s_file.replace(f'-Scene-{s_wrong_scene}', f'-Scene-{s_wrong_scene}+{s_correct}')
+                        s_file_new = f"./Renamed/{s_replace}"
+                        
+                        if b_test:
+                            print(f'changed file {s_file} to {s_file_new}')
+                        else:
+                            os.rename(s_file, s_file_new)
+                            print(f'changed file {s_file} to {s_file_new}')
+    return(df_test)
+
+def check_seg_markers(df_img,d_segment = {'CK19':1002,'CK5':5002,'CD45':2002,'Ecad':802,'CD44':1202,'CK7':2002,'CK14':502}, i_rows=1, t_figsize=(20,10)):
+    """
+    This script makes binarizedoverviews of all the specified segmentation markers
+    with specified thresholds, and outputs a rounds cycles table
+    Input: df_dapi: output of mpimage.parse_org()
+     d_segment: segmentation marker names and thresholds
+     i_rows = number or rows in figure
+     t_figsize = (x, y) in inches size of figure
+    Output: dictionary
+    """
+    d_result = {}
+    for s_key,i_item in d_segment.items():
+        #find all segmentation marker slides
+        df_img_seg = df_img[df_img.marker==s_key]
+        fig,ax = plt.subplots(i_rows,(len(df_img_seg)+(i_rows-1))//i_rows, figsize = t_figsize, squeeze=False)
+        ax = ax.ravel()
+        for idx,s_scene in enumerate(sorted(df_img_seg.index.tolist())):
+            print(f'Processing {s_scene}')
+            im_low = skimage.io.imread(s_scene)
+            im = skimage.exposure.rescale_intensity(im_low,in_range=(i_item,i_item+1))
+            ax[idx].imshow(im, cmap='gray')
+            s_round = s_scene.split('Scene')[1].split('_')[0]
+            ax[idx].set_title(f'{s_key} Scene{s_round} min={i_item}',{'fontsize':12})
+        plt.tight_layout()
+        d_result.update({s_key:fig})
+    return(d_result)
+
+def checkall_seg_markers(df_img,d_segment = {'CK19':1002,'CK5':5002,'CD45':2002,'Ecad':802,'CD44':1202,'CK7':2002,'CK14':502}, i_rows=2, t_figsize=(15,10)):
+    """
+    This script makes binarizedoverviews of all the specified segmentation markers
+    with specified thresholds, and it puts all segmentation markers in one figure
+    Input: df_dapi: output of mpimage.parse_org()
+     d_segment: segmentation marker names and thresholds
+     i_rows = number or rows in figure
+     t_figsize = (x, y) in inches size of figure
+    Output: dictionary
+    """
+    es_seg = set([s_key for s_key,i_item in d_segment.items()])
+    df_img_seg = df_img[df_img.marker.isin(es_seg)]
+    fig,ax = plt.subplots(i_rows,(len(es_seg)+(i_rows-1))//i_rows, figsize = t_figsize, squeeze=False)
+    ax = ax.ravel()
+    for idx,s_scene in enumerate(sorted(df_img_seg.index.tolist())):
+            s_key = df_img.loc[s_scene].marker
+            i_item = d_segment[s_key]
+            print(f'Processing {s_scene}')
+            im_low = skimage.io.imread(s_scene)
+            im = skimage.exposure.rescale_intensity(im_low,in_range=(i_item,i_item+1))
+            ax[idx].imshow(im, cmap='gray')
+            s_round = s_scene.split('Scene')[1].split('_')[0]
+            ax[idx].set_title(f'{s_key} Scene{s_round} min={i_item}',{'fontsize':12})
+    plt.tight_layout()
+        #d_result.update({s_key:fig})
+    return(fig)
+
+def rounds_cycles(s_find='-Scene-001_c', d_segment = {'CK19':1002,'CK5':5002,'CD45':4502,'Ecad':802,'CD44':1202,'CK7':2002,'CK14':502}):
+    """
+    Based on filenames in segment folder, makes a dataframe with Rounds Cycles Info
+    """
+    ls_marker = []
+    df_dapi = pd.DataFrame() #(columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+    for s_name in sorted(os.listdir()):
+        if s_name.find(s_find) > -1:
+            s_color = s_name.split('_')[3]
+            if s_color != 'c1':
+                #print(s_name)
+                if s_color == 'c2':
+                    s_marker = s_name.split('_')[1].split('.')[0]
+                elif s_color == 'c3':
+                    s_marker = s_name.split('_')[1].split('.')[1]
+                elif s_color == 'c4':
+                    s_marker = s_name.split('_')[1].split('.')[2]
+                elif s_color == 'c5':
+                    s_marker = s_name.split('_')[1].split('.')[3]
+                else: 
+                    print('Error: unrecognized channel name')
+                    s_marker = 'error'
+                ls_marker.append(s_marker)
+                df_marker = pd.DataFrame(index = [s_marker],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+                df_marker.loc[s_marker,'rounds'] = s_name.split('_')[0].split('Registered-')[1]
+                df_marker.loc[s_marker,'colors'] = s_name.split('_')[3]
+                df_marker.loc[s_marker,'minimum'] = 1003
+                df_marker.loc[s_marker,'maximum'] = 65535
+                df_marker.loc[s_marker,'exposure'] = 100
+                df_marker.loc[s_marker,'refexp'] = 100
+                df_marker.loc[s_marker,'location'] = 'All'
+                df_dapi = df_dapi.append(df_marker)
+    for s_key,i_item in d_segment.items():
+        df_dapi.loc[s_key,'minimum'] = i_item
+    #if len(ls_marker) != len(set(df_marker.index)):
+    #    print('Check for repeated biomarkers!')
+    for s_marker in ls_marker:
+        if (np.array([s_marker == item for item in ls_marker]).sum()) != 1:
+            print('Repeated marker!/n')
+            print(s_marker)
+
+    return(df_dapi, ls_marker)
+
+def cluster_java(s_dir='JE1',s_sample='SampleID',imagedir='PathtoImages',segmentdir='PathtoSegmentation',type='exacloud',b_segment=True,b_TMA=True):
+    """
+    makes specific changes to files in Jenny's Work directories to result in Cluster.java file
+    s_dir = directory to make cluster.java file in
+    s_sample = unique sample ID
+    imagedir = full /path/to/images
+    type = 'exacloud' or 'eppec' (different make file settings)
+    b_TMA = True if tissue is a TMA
+    b_segment = True if segmentation if being done (or False if feature extraction only)
+    """
+    if type=='exacloud':
+        os.chdir(f'{s_work_path}/exacloud/')
+        with open('TemplateExacloudCluster.java') as f:
+            s_file = f.read()
+    elif type=='eppec':
+        os.chdir(f'{s_work_path}/eppec/')
+        with open('TemplateEppecCluster.java') as f:
+            s_file = f.read()
+    else:
+        print('Error: type must be exacloud or eppec')
+    s_file = s_file.replace('PathtoImages',imagedir)
+    s_file = s_file.replace('PathtoSegmentation',f'{segmentdir}/{s_sample.split("-Scene")[0]}_Segmentation/')
+    s_file = s_file.replace('PathtoFeatures',f'{segmentdir}/{s_sample.split("-Scene")[0]}_Features/')
+    if b_segment:
+        s_file = s_file.replace('/*cif.Experiment','cif.Experiment')
+        s_file = s_file.replace('("Segmentation Done!") ;*/','("Segmentation Done!") ;')
+    if b_TMA:
+        s_file = s_file.replace('cif.CROPS ;','cif.TMA ;')
+    os.chdir(f'./{s_dir}/')
+    with open('Cluster.java', 'w') as f:
+        f.write(s_file)
+
+def registration_matlab(N_smpl='10000',N_colors='5',s_rootdir='PathtoImages',s_subdirname='RegisteredImages/',s_ref_id='./R1_*_c1_ORG.tif',
+    ls_order = ['R1','R2','R3','R4','R5','R6','R7','R8','R9','R10','R11','R0','R11Q']):
+
+    """
+    makes specific changes to template matlab scripts files in Jenny's directories to result in .m file
+    Input:
+    N_smpl = i_N_smpl; %number of features to detect in image (default = 10000)
+    N_colors = i_N_colors; %number of colors in R1 (default = 5)
+    ls_order = {RoundOrderString}; %list of names and order of rounds
+    s_rootdir = 'PathtoImages' %location of raw images in folder
+    s_ref_id = 'RefDapiUniqueID'; %shared unique identifier of reference dapi
+    s_subdirname = 'PathtoRegisteredImages' %location of folder where registered images will reside
+    """
+    ls_order_q = [f"'{item}'" for item in ls_order]
+    #find template, open ,edit
+    os.chdir(f'{s_src_path}/src')
+    with open('template_registration_server_multislide_roundorder_scenes_2019_11_11.m') as f:
+            s_file = f.read()
+    s_file = s_file.replace('PathtoImages',s_rootdir)
+    s_file = s_file.replace('PathtoRegisteredImages',s_subdirname)
+    s_file = s_file.replace('i_N_smpl',N_smpl)
+    s_file = s_file.replace('i_N_colors',N_colors)
+    s_file = s_file.replace("RoundOrderString",",".join(ls_order_q))
+    s_file = s_file.replace('RefDapiUniqueID',s_ref_id)
+
+    #save edited .m file
+    os.chdir(s_rootdir)
+    with open('registration_py.m', 'w') as f:
+        f.write(s_file)
+
+def large_registration_matlab(N_smpl='10000',N_colors='5',s_rootdir='PathtoImages',s_subdirname='RegisteredImages',s_ref_id='./R1_*_c1_ORG.tif',
+     ls_order = ['R1','R2','R3','R4','R5','R6','R7','R8','R9','R10','R11','R0','R11Q'],d_crop_regions={1:'[0 0 1000 1000]'}):
+    """
+    makes specific changes to template matlab scripts files in Jenny's directories to result in .m file
+    Input:
+    N_smpl = i_N_smpl; %number of features to detect in image (default = 10000)
+    N_colors = i_N_colors; %number of colors in R1 (default = 5)
+    ls_order = {RoundOrderString}; %list of names and order of rounds
+    s_rootdir = 'PathtoImages' %location of raw images in folder
+    s_ref_id = 'RefDapiUniqueID'; %shared unique identifier of reference dapi
+    s_subdirname = 'PathtoRegisteredImages' %location of folder where registered images will reside
+    d_crop_regions= dictioanr with crop integer as key, ans string with crop array as value e.g. {1:'[0 0 1000 1000]'}
+
+    """
+    ls_order_q = [f"'{item}'" for item in ls_order]
+
+    os.chdir(f'{s_src_path}/src')
+    with open('template_registration_server_largeimages_roundorder_2019_11_11.m') as f:
+        s_file = f.read()
+    s_file = s_file.replace('PathtoImages',s_rootdir)
+    s_file = s_file.replace('PathtoRegisteredImages',s_subdirname)
+    s_file = s_file.replace('i_N_smpl',N_smpl)
+    s_file = s_file.replace('i_N_colors',N_colors)
+    s_file = s_file.replace("RoundOrderString",",".join(ls_order_q))
+    s_file = s_file.replace('RefDapiUniqueID',s_ref_id)
+
+    for i_crop_region, s_crop in d_crop_regions.items():
+        s_file = s_file.replace(f'%{i_crop_region}%{i_crop_region}%','')
+        s_file = s_file.replace(f'[a_crop_{i_crop_region}]',s_crop)
+    #save edited .m file
+    os.chdir(s_rootdir)
+    with open('registration_py.m', 'w') as f:
+        f.write(s_file)
+
+def cmif_mkdir(ls_dir):
+    '''
+    check if directories existe. if not, make them
+    '''
+    for s_dir in ls_dir:
+        if not os.path.exists(s_dir):
+            os.makedirs(s_dir)
+
+######################### Old functions ############################
+
+def check_reg_channels(ls_find=['c1_ORG','c2_ORG'], i_rows=2, t_figsize=(20,10), b_separate = False, b_mkdir=True):
+    """
+    This script makes overviews of all the specified channel images of registered tiff images
+    in a big folder (slides prepared for segmentation for example)
+    Input: ls_find = list of channels to view
+     i_rows = number or rows in figure
+     t_figsize = (x, y) in inches size of figure
+     b_mkdir = boolean whether to make a new Check_Registration folder
+    Output: dictionary with {slide_color:number of rounds found}
+     images of all rounds of a certain slide_color
+    """
+    d_result = {}
+    ls_error = []
+    if b_separate:
+        s_dir = os.getcwd()
+        os.chdir('..')
+        s_path = os.getcwd()
+        if b_mkdir:
+            os.mkdir(f'./Check_Registration')
+        os.chdir(s_dir)
+    else:
+        s_path = os.getcwd()
+        if b_mkdir:
+            os.mkdir(f'./Check_Registration')
+    for s_find in ls_find:
+        #find all dapi slides
+        ls_dapis = []
+        for s_dir in os.listdir():
+            if s_dir.find(s_find) > -1:
+                ls_dapis = ls_dapis + [s_dir]
+        
+        #find all unique scenes
+        ls_scene_long = []
+        for s_dapi in ls_dapis:
+            ls_scene_long = ls_scene_long + [(s_dapi.split('_')[2])]
+        ls_scene = list(set(ls_scene_long))
+        ls_scene.sort()
+
+        for s_scene in ls_scene:
+            print(f'Processing {s_scene}')
+            ls_dapi = []
+            for s_file in ls_dapis:
+                if s_file.find(s_scene)>-1:
+                    ls_dapi = ls_dapi + [s_file]
+            fig,ax = plt.subplots(i_rows,(len(ls_dapi)+(i_rows-1))//i_rows, figsize = t_figsize)
+            ax = ax.ravel()
+            ls_dapi.sort()
+            for x in range(len(ls_dapi)):
+                im_low = skimage.io.imread(ls_dapi[x])
+                im = skimage.exposure.rescale_intensity(im_low,in_range=(np.quantile(im_low,0.02),np.quantile(im_low,0.98)+np.quantile(im_low,0.98)/2))
+                ax[x].imshow(im, cmap='gray')
+                s_round = ls_dapi[x].split('_')[0].split('-')[1]
+                ax[x].set_title(s_round,{'fontsize':12})
+            s_slide = ls_dapi[0].split('_')[2]
+            plt.tight_layout()
+            fig.savefig(f'{s_path}/Check_Registration/{s_slide}_{s_find}.png')
+            d_result.update({f'{s_slide}_{s_find}':len(ls_dapi)})
+            ls_error = ls_error + [len(ls_dapi)]
+    if(len(set(ls_error))==1):
+        print("All checked scenes/channels have the same number of images")
+    else:
+        print("Warning: different number of images in some scenes/channels")
+        for s_key, i_item in d_result.items():
+            print(f'{s_key} has {i_item} images')
+    return(d_result)
+	
+
+def check_names_deprecated(s_find='-Scene-001_c',b_print=False):
+    """
+    Based on filenames in segment folder, 
+    checks marker names against standard list of biomarkers
+    returns a dataframe with Rounds Cycles Info, and sets of wrong and correct names
+    Input: s_find = string that will be unique to one scene to check in the folder
+    """
+    df_dapi = pd.DataFrame() #(columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+    for s_name in sorted(os.listdir()):
+        if s_name.find(s_find) > -1:
+            s_color = s_name.split('_')[3]
+            if s_color != 'c1':
+                if b_print:
+                    print(s_name)
+                if s_color == 'c2':
+                    s_marker = s_name.split('_')[1].split('.')[0]
+                elif s_color == 'c3':
+                    s_marker = s_name.split('_')[1].split('.')[1]
+                elif s_color == 'c4':
+                    s_marker = s_name.split('_')[1].split('.')[2]
+                elif s_color == 'c5':
+                    s_marker = s_name.split('_')[1].split('.')[3]
+                else: 
+                    print('Error: unrecognized channel name')
+                    s_marker = 'error'
+                df_marker = pd.DataFrame(index = [s_marker],columns=['rounds','colors','minimum','maximum','exposure','refexp','location'])
+                df_marker.loc[s_marker,'rounds'] = s_name.split('_')[0].split('Registered-')[1]
+                df_marker.loc[s_marker,'colors'] = s_name.split('_')[3]
+                df_marker.loc[s_marker,'minimum'] = 1003
+                df_marker.loc[s_marker,'maximum'] = 65535
+                df_marker.loc[s_marker,'exposure'] = 100
+                df_marker.loc[s_marker,'refexp'] = 100
+                df_marker.loc[s_marker,'location'] = 'All'
+                df_dapi = df_dapi.append(df_marker)
+    es_names = set(df_dapi.index)
+    es_standard = {'PDL1','pERK','CK19','pHH3','CK14','Ki67','Ecad','PCNA','HER2','ER','CD44',
+        'aSMA','AR','pAKT','LamAC','CK5','EGFR','pRB','FoxP3','CK7','PDPN','CD4','PgR','Vim',
+        'CD8','CD31','CD45','panCK','CD68','PD1','CD20','CK8','cPARP','ColIV','ColI','CK17',
+        'H3K4','gH2AX','CD3','H3K27','53BP1','BCL2','GRNZB','LamB1','pS6RP','BAX','RAD51',
+        'R0c2','R0c3','R0c4','R0c5','R5Qc2','R5Qc3','R5Qc4','R5Qc5','R11Qc2','R11Qc3','R11Qc4','R11Qc5',
+        'R7Qc2','R7Qc3','R7Qc4','R7Qc5','PDL1ab','PDL1d','R14Qc2','R14Qc3','R14Qc4','R14Qc5',
+        'R8Qc2','R8Qc3','R8Qc4','R8Qc5','R12Qc2','R12Qc3','R12Qc4','R12Qc5','PgRc4',
+        'Glut1','CoxIV','LamB2','S100','BMP4','BMP2','BMP6','pS62MYC', 'CGA', 'p63', 'SYP','PDGFRa', 'HIF1a'}#,'PDGFRB'CD66b (Neutrophils)	HLA class II or CD21(Dendritic cells)
+        #BMP4	Fibronectin, CD11b (dendritic, macrophage/monocyte/granulocyte)	CD163 (macrophages)
+        #CD83 (dendritic cells)	FAP	Muc1
+    es_wrong = es_names - es_standard
+    es_right = es_standard.intersection(es_names)
+    print(f'Wrong names {es_wrong}')
+    print(f' Right names {es_right}')
+    return(df_dapi, es_wrong, es_right)
+
+def file_sort(s_sample, s_path, i_scenes=14,i_rounds=12,i_digits=3,ls_quench=['R5Q','R11Q'],s_find='_ORG.tif',b_scene=False):
+    '''
+    count rounds and channels of images (koeis naming convention, not registered yet)
+    '''
+    os.chdir(s_path)
+    se_dir = pd.Series(os.listdir())
+
+    se_dir = se_dir[se_dir.str.find(s_find)>-1]
+    se_dir = se_dir.sort_values()
+    se_dir = se_dir.reset_index()
+    se_dir = se_dir.drop('index',axis=1)
+
+    print(s_sample)
+    print(f'Total _ORG.tif: {len(se_dir)}')
+
+    #count files in each round, plus store file names on df_round
+    df_round = pd.DataFrame(index=range(540))
+    i_grand_tot = 0
+    for x in range(i_rounds):
+        se_round = se_dir[se_dir.iloc[:,0].str.contains(f'R{str(x)}_')]
+        se_round = se_round.rename({0:'round'},axis=1)
+        se_round = se_round.sort_values(by='round')
+        se_round = se_round.reset_index()
+        se_round = se_round.drop('index',axis=1)
+        i_tot = se_dir.iloc[:,0].str.contains(f'R{str(x)}_').sum()
+        i_round = 'Round ' + str(x)
+        print(f'{i_round}: {i_tot}')
+        i_grand_tot = i_grand_tot + i_tot
+        df_round[i_round]=se_round
+    df_round = df_round.dropna()    
+
+    #quenched round special loop
+    for s_quench in ls_quench:
+        #x = "{0:0>2}".format(x)
+        i_tot = se_dir.iloc[:,0].str.contains(s_quench).sum()
+        #i_round = 'Round ' + str(x)
+        print(f'{s_quench}: {i_tot}')
+        i_grand_tot = i_grand_tot + i_tot 
+    print(f'Total files containing Rxx_: {i_grand_tot}')
+    
+    if b_scene:
+        #print number of files in each scene
+        for x in range(1,i_scenes+1):
+            if i_digits==3:
+                i_scene = "{0:0>3}".format(x)
+            elif i_digits==2:
+                i_scene = "{0:0>2}".format(x)
+            elif i_digits==1:
+                i_scene = "{0:0>1}".format(x)
+            else:
+                print('wrong i_digits input (must be between 1 and 3')
+            i_tot = se_dir.iloc[:,0].str.contains(f'Scene-{i_scene}_').sum()
+            i_round = 'Scene ' + str(x)
+            print(f'{i_round}: {i_tot}')
+
+    #print number of files in each color
+    for x in range(1,6):
+        #i_scene = "{0:0>2}".format(x)
+        i_tot = se_dir.iloc[:,0].str.contains(f'_c{str(x)}_ORG').sum()
+        i_round = 'color ' + str(x)
+        print(f'{i_round}: {i_tot}')
+
+    d_result = {}	
+    for s_round in df_round.columns:
+        es_round = set([item.split('-Scene-')[1].split('_')[0] for item in list(df_round.loc[:,s_round].values)])
+        d_result.update({s_round:es_round})
+    print('\n')
+
+
+def change_fname(s_old='_oldstring_',s_new='_newstring_',b_test=True):
+    """
+    replace anything in file name
+    """
+    if b_test:
+        ls_test = []
+        for s_file in os.listdir():
+            if s_file.find(s_old) > -1:
+                ls_test = ls_test + [s_file]
+                len(ls_test)
+                s_file_new = s_file.replace(s_old,s_new)
+                print(f'changed file {s_file}\tto {s_file_new}')
+
+        print(f'total number of files changed is {len(ls_test)}')
+    #really rename
+    else:
+        ls_test = []
+        for s_file in os.listdir():
+            if s_file.find(s_old) > -1:
+                ls_test = ls_test + [s_file]
+                len(ls_test)
+                s_file_new = s_file.replace(s_old,s_new)
+                print(f'changed file {s_file}\tto {s_file_new}')
+                os.rename(s_file, s_file_new) #comment out this line to test
+        print(f'total number of files changed is {len(ls_test)}')
+
+def check_reg_slides(i_rows=2, t_figsize=(20,10), b_mkdir=True):
+    """
+    This script makes overviews of all the dapi images of registered images in a big folder (slides prepared for segmentation for example)
+    """
+    #find all dapi slides
+    ls_dapis = []
+    for s_dir in os.listdir():
+        if s_dir.find('c1_ORG') > -1:
+            ls_dapis = ls_dapis + [s_dir]
+
+    #find all scenes
+    ls_scene_long = []
+    for s_dapi in ls_dapis:
+        ls_scene_long = ls_scene_long + [(s_dapi.split('Scene')[1].split('_')[0])]
+    ls_scene = list(set(ls_scene_long))
+    ls_scene.sort()
+    if b_mkdir:
+        os.mkdir(f'./Check_Registration')
+    for s_scene in ls_scene:
+        print(f'Processing {s_scene}')
+        ls_dapi = []
+        for s_file in ls_dapis:
+            if s_file.find(f'Scene{s_scene}')>-1:
+                ls_dapi = ls_dapi + [s_file]
+        fig,ax = plt.subplots(i_rows,(len(ls_dapi)+(i_rows-1))//i_rows, figsize = t_figsize)
+        ax = ax.ravel()
+        ls_dapi.sort()
+        for x in range(len(ls_dapi)):
+            im_low = skimage.io.imread(ls_dapi[x])
+            im = skimage.exposure.rescale_intensity(im_low,in_range=(np.quantile(im_low,0.02),np.quantile(im_low,0.98)+np.quantile(im_low,0.98)/2))
+            ax[x].imshow(im, cmap='gray')
+            s_round = ls_dapi[x].split('_')[0].split('-')[1]
+            ax[x].set_title(s_round,{'fontsize':12})
+        s_slide = ls_dapi[0].split('_')[2]
+        plt.tight_layout()
+        fig.savefig(f'Check_Registration/{s_slide}.png')
+
+def check_reg_dirs(s_dir='SlideName',s_subdir='Registered-SlideName', i_rows=2, t_figsize=(20,10), b_mkdir=True):
+    """
+    this checks registration when files are in subdirectories (such as with large tissues, i.e. NP005)
+    """
+
+    rootdir = os.getcwd()
+    if b_mkdir:
+        os.mkdir(f'./Check_Registration')
+    #locate subdirectores
+    for s_dir in os.listdir():
+        if s_dir.find(s_dir) > -1:
+            os.chdir(f'./{s_dir}')
+
+            #locate registered image folders
+            for s_dir in os.listdir():
+            #for s_dir in ls_test2:
+                if s_dir.find(s_subdir) > -1:  #'Registered-BR1506-A019-Scene'
+                    print(f'Processing {s_dir}')
+                    ls_dapi = []
+                    os.chdir(f'./{s_dir}')
+                    ls_file = os.listdir()
+                    for s_file in ls_file:
+                        if s_file.find('_c1_ORG.tif')>-1:
+                            ls_dapi = ls_dapi + [s_file]
+                    fig,ax = plt.subplots(i_rows,(len(ls_dapi)+(i_rows-1))//i_rows, figsize = (t_figsize)) #vertical
+                    ax=ax.ravel()
+                    ls_dapi.sort()
+                    for x in range(len(ls_dapi)):
+                        im_low = skimage.io.imread(ls_dapi[x])
+                        im = skimage.exposure.rescale_intensity(im_low,in_range=(np.quantile(im_low,0.02),np.quantile(im_low,0.98)+np.quantile(im_low,0.98)/2))
+                        ax[x].imshow(im, cmap='gray')
+                        s_round = ls_dapi[x].split('_')[0].split('-')[1]
+                        s_scene = ls_dapi[x].split('-Scene')[1].split('_')[0]
+                        ax[x].set_title(f'{s_round} Scene{s_scene}',{'fontsize':12})
+                    plt.tight_layout()
+
+                    #save figure in the rootdir/Check_Registration folder
+                    fig.savefig(f'{rootdir}/Check_Registration/{s_dir}.png')
+            #go out of the subfoler and start next processing
+                os.chdir('..')
+
+def test(name="this_is_you_name"):
+    '''
+    This is my first doc string
+    '''
+    print(f'hello {name}')
+    return True
diff --git a/mplex_image/process.py b/mplex_image/process.py
new file mode 100755
index 0000000..9057580
--- /dev/null
+++ b/mplex_image/process.py
@@ -0,0 +1,1208 @@
+####
+# title: process.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to process cyclic data and images after segmentation
+####
+
+#libraries
+import pandas as pd
+import matplotlib as mpl
+mpl.use('agg')
+import matplotlib.pyplot as plt
+import os
+import numpy as np
+import skimage
+import copy
+import re
+import seaborn as sns
+from PIL import Image
+Image.MAX_IMAGE_PIXELS = 1000000000
+
+#function cellpose
+def load_cellpose_df(ls_sample, segdir):
+    '''
+    load all full feature dataframes in sample list
+    '''
+    df_mi_full = pd.DataFrame()
+    for idx, s_sample in enumerate(ls_sample):
+        print(f'Loading features_{s_sample}_MeanIntensity_Centroid_Shape.csv')
+        df_tt = pd.read_csv(f'{segdir}/features_{s_sample}_MeanIntensity_Centroid_Shape.csv',index_col=0)
+        df_tt['slide'] = s_sample.split('-Scene')[0]
+        df_tt['scene'] = [item.split('_')[1] for item in df_tt.index]
+        df_mi_full = df_mi_full.append(df_tt,sort=True)
+    #add scene
+    df_mi_full['slide_scene'] = df_mi_full.slide + '_' + df_mi_full.scene
+    print('')
+    return(df_mi_full)
+
+# load li thresholds
+def load_li(ls_sample, s_thresh, man_thresh):
+    '''
+    load threshold on the segmentation marker images acquired during feature extraction
+    '''
+    df_img_all =pd.DataFrame()
+    for s_sample in ls_sample:
+        print(f'Loading thresh_{s_sample}_ThresholdLi.csv')
+        df_img = pd.read_csv(f'thresh_{s_sample}_ThresholdLi.csv', index_col=0)
+        df_img['rounds'] = [item.split('_')[0].split('Registered-')[1] for item in df_img.index]
+        df_img['color'] = [item.split('_')[-2] for item in df_img.index]
+        df_img['slide'] = [item.split('_')[2].split('-Scene-')[0] for item in df_img.index]
+        df_img['scene'] = [item.split('_')[2].split('-Scene-')[1] for item in df_img.index]
+        df_img['slide_scene'] = df_img.slide + '_scene' + df_img.scene
+        #parse file name for biomarker
+        for s_index in df_img.index:
+            #print(s_index)
+            s_color = df_img.loc[s_index,'color']
+            if s_color == 'c1':
+                s_marker = f"DAPI{df_img.loc[s_index,'rounds'].split('R')[1]}"
+            elif s_color == 'c2':
+                s_marker = s_index.split('_')[1].split('.')[0]
+            elif s_color == 'c3':
+                s_marker = s_index.split('_')[1].split('.')[1]
+            elif s_color == 'c4':
+                s_marker = s_index.split('_')[1].split('.')[2]
+            elif s_color == 'c5':
+                s_marker = s_index.split('_')[1].split('.')[3]
+            else: print('Error')
+            df_img.loc[s_index,'marker'] = s_marker
+        df_img_all = df_img_all.append(df_img)
+    print('')
+    #manually override too low Ecad thresh
+    if s_thresh !='':
+        df_img_all.loc[df_img_all[(df_img_all.marker==s_thresh) & (df_img_all.threshold_li < man_thresh)].index, 'threshold_li'] = man_thresh
+    return(df_img_all)
+
+def filter_cellpose_xy(df_mi_full,ls_centroid = ['DAPI2_nuclei_area', 'DAPI2_nuclei_centroid-0', 'DAPI2_nuclei_centroid-1','DAPI2_nuclei_eccentricity']):
+    '''
+    select the nuclei centoids, area, eccentricity from a marker 
+    default: use DAPI2
+    '''
+    #NOTE add area
+    df_xy = df_mi_full.loc[:,ls_centroid]
+    print('QC: make sure centroids dont have too many NAs')
+    print(df_xy.isna().sum())
+    print('')
+    df_xy = df_xy.dropna(axis=0,how='any')
+    df_xy.columns = ['nuclei_area','DAPI_Y','DAPI_X','nuclei_eccentricity']
+    df_xy['slide_scene'] = [item.split('_cell')[0] for item in df_xy.index]
+    return(df_xy)
+
+def drop_last_rounds(df_img_all,ls_filter,df_mi_full):
+    '''
+    drop any rounds after the last round DAPI filter 
+    '''
+    df_img_all['round_ord'] = [re.sub('Q','.5', item) for item in df_img_all.rounds] 
+    df_img_all['round_ord'] = [float(re.sub('[^0-9.]','', item)) for item in df_img_all.round_ord]
+    i_max = df_img_all[df_img_all.marker.isin([item.split('_')[0] for item in ls_filter])].sort_values('round_ord').iloc[-1].round_ord
+    print(f'Dropping markers after round {i_max}')
+    ls_drop_marker = [item + '_' for item in sorted(set(df_img_all[(df_img_all.round_ord>i_max)].marker))]
+    [print(item) for item in ls_drop_marker]
+    print('')
+    [df_mi_full.drop(df_mi_full.columns[df_mi_full.columns.str.contains(item)],axis=1,inplace=True) for item in ls_drop_marker]
+    return(df_mi_full,i_max)
+
+def plot_thresh(df_img_all,s_thresh):
+    '''
+    tissues: plot threshold across all tissues
+    (negative scenes will drive down the mean
+    '''
+    ls_slides = sorted(set(df_mi_full.slide))
+    df_plot = df_img_all[(df_img_all.marker==s_thresh)].loc[:,['threshold_li']]
+    fig,ax=plt.subplots(figsize=(4,3.5))
+    sns.stripplot(data=df_plot)
+    sns.barplot(data=df_plot, alpha=0.5)
+    labels = ax.get_xticklabels
+    plt.tight_layout()
+    fig.savefig(f'{qcdir}/QC_EcadThresh_{".".join(ls_slides)}.png')
+
+def fill_cellpose_nas(df_mi_full,ls_marker_cyto,s_thresh='Ecad',man_thresh=1000): 
+    '''
+    some nuclei don't have a cytoplasm, replace NA with perinuc5
+    '''
+    df = df_mi_full.copy(deep=True)
+    # since segmentation was run on ecad, use ecad threshold
+    print(f'Finding {s_thresh} positive cells')
+    ls_neg_cells = (df_mi_full[~(df_mi_full.loc[:,f'{s_thresh}_cytoplasm'] > man_thresh)]).index.tolist()#= ls_neg_cells + ls_neg_slide
+    print('')
+    # replace cells without cytoplasm (ecad) with perinuc 5
+    print(f'For cells that are {s_thresh} negative:')
+    for s_marker in ls_marker_cyto:
+        print(f'Replace  {s_marker}_cytoplasm nas')
+        df.loc[ls_neg_cells,f'{s_marker}_cytoplasm'] = df.loc[ls_neg_cells,f'{s_marker}_perinuc5'] 
+        print(f'with {s_marker}_perinuc5')
+    df[f'{s_thresh}_negative'] = df.index.isin(ls_neg_cells)
+    return(df)
+
+def shrink_seg_regions(df_mi_full,s_thresh,ls_celline=[],ls_shrunk=[]):
+    '''
+    For markers with stromal to tumor bleedthrough, use shrunken segmentation region
+    '''
+    #enforce cell lines as tumor
+    print('')
+    if len(ls_celline) > 0:
+        print([f'Enforce {item} as tumor' for item in ls_celline])
+    ls_ecad_cells = df_mi_full[~df_mi_full.loc[:,f'{s_thresh}_negative']].index
+    ls_tumor_cells = (df_mi_full[(df_mi_full.index.isin(ls_ecad_cells)) | (df_mi_full.slide_scene.isin(ls_celline))]).index
+    ls_stromal_cells = (df_mi_full[~df_mi_full.index.isin(ls_tumor_cells)]).index
+    #relplace tumor cell CD44 and Vim with shrunken area (only helps bleed trough a little)
+    print('For markers with stromal to tumor bleedthrough, use shrunken segmentation region:')
+    for s_marker in ls_shrunk:
+        print(f'Replace  {s_marker.split("_")[0]}_perinuc5 in tumor cells with')
+        df_mi_full.loc[ls_tumor_cells,f'{s_marker.split("_")[0]}_perinuc5'] = df_mi_full.loc[ls_tumor_cells,f'{s_marker}']
+        print(f'with {s_marker}')
+    print('')
+    return(df_mi_full)
+
+def fill_membrane_nas(df_mi_full, df_mi_mem,s_thresh='Ecad',ls_membrane=['HER2']):
+    '''
+    fill cell membrane nsa with expanded nuclei nas
+    '''
+    ls_neg = df_mi_full[(df_mi_full.loc[:,f'{s_thresh}_negative']) & (df_mi_full.index.isin(df_mi_mem.index))].index
+    ls_pos = df_mi_full[(~df_mi_full.loc[:,f'{s_thresh}_negative']) & (df_mi_full.index.isin(df_mi_mem.index))].index
+    for s_membrane in ls_membrane:
+        print(f'Replace {s_membrane}_cellmem25 nas \n with {s_membrane}_exp5nucmembrane25')
+        df_mi_mem.loc[ls_neg,f'{s_membrane}_cellmem25'] =  df_mi_mem.loc[ls_neg,f'{s_membrane}_exp5nucmembrane25']
+        ls_na = df_mi_mem.loc[df_mi_mem.loc[:,f'{s_membrane}_cellmem25'].isna(),:].index
+        df_mi_mem.loc[ls_na,f'{s_membrane}_cellmem25'] = df_mi_mem.loc[ls_na,f'{s_membrane}_exp5nucmembrane25']
+    df_merge = df_mi_full.merge(df_mi_mem, left_index=True, right_index=True)
+    print('')
+    return(df_merge)
+
+def fill_bright_nas(ls_membrane,s_sample,s_thresh,df_mi_filled,segdir):
+    if len(ls_membrane) > 0:
+        print(f'Loading features_{s_sample}_BrightMeanIntensity.csv')
+        df_mi_mem = pd.read_csv(f'{segdir}/features_{s_sample}_BrightMeanIntensity.csv',index_col=0)
+        df_mi_mem_fill = fill_membrane_nas(df_mi_filled, df_mi_mem,s_thresh=s_thresh,ls_membrane=ls_membrane)
+    else:
+        df_mi_mem_fill = df_mi_filled
+    return(df_mi_mem_fill)
+
+def auto_threshold(df_mi,df_img_all):
+    # # Auto threshold
+
+    #make positive dataframe to check threhsolds 
+    ls_scene = sorted(set(df_mi.slide_scene))
+
+    df_pos_auto = pd.DataFrame()
+    d_thresh_record= {}
+
+    for s_slide_scene in ls_scene:
+        print(f'Thresholding {s_slide_scene}')
+        ls_index = df_mi[df_mi.slide_scene==s_slide_scene].index
+        df_scene = pd.DataFrame(index=ls_index)
+        df_img_scene = df_img_all[df_img_all.slide_scene==s_slide_scene]
+
+        for s_index in df_img_scene.index:
+            s_scene =f"{df_img_all.loc[s_index,'slide']}_scene{df_img_all.loc[s_index,'scene']}"
+            s_marker = df_img_all.loc[s_index,'marker']
+            s_columns = df_mi.columns[df_mi.columns.str.contains(f"{s_marker}_")]
+            if len(s_columns)==1:
+                s_marker_loc = s_columns[0]
+            else:
+                continue
+            i_thresh = df_img_all.loc[s_index,'threshold_li']
+            d_thresh_record.update({f'{s_scene}_{s_marker}':i_thresh})
+            df_scene.loc[ls_index,s_marker_loc] = df_mi.loc[ls_index,s_marker_loc] >= i_thresh
+        df_pos_auto = df_pos_auto.append(df_scene)
+    return(df_pos_auto,d_thresh_record)
+
+def positive_scatterplots(df_pos_auto,d_thresh_record,df_xy,ls_color,qcdir='.'):
+    '''
+    for marker in ls_color, plot positive cells location in tissue
+    '''
+    ls_scene = sorted(set(df_xy.slide_scene))
+
+    for s_scene in ls_scene:
+        print(f'Plotting {s_scene}')
+        #negative cells = all cells even before dapi filtering
+        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
+        #plot
+        fig, ax = plt.subplots(2, ((len(ls_color))+1)//2, figsize=(18,12)) #figsize=(18,12)
+        ax = ax.ravel()
+        for ax_num, s_color in enumerate(ls_color):
+            s_marker = s_color.split('_')[0]
+            s_min = d_thresh_record[f"{s_scene}_{s_marker}"]
+            #positive cells = positive cells based on threshold
+            ls_pos_index = (df_pos_auto[df_pos_auto.loc[:,s_color]]).index
+            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
+            if len(df_color_pos)>=1:
+                #plot negative cells
+                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
+                #plot positive cells
+                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
+                      
+                ax[ax_num].axis('equal')
+                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
+                ax[ax_num].set_title(f'{s_marker} min={int(s_min)}')
+            else:
+                ax[ax_num].set_title(f'{s_marker} min={int(s_min)}')
+        ls_save = [item.split('_')[0] for item in ls_color]
+        fig.suptitle(s_scene)
+        fig.savefig(f'{qcdir}/QC_{".".join(ls_save)}_{s_scene}_auto.png')
+
+def plot_thresh_results(df_img_all,df_pos_auto,d_thresh_record,df_xy,i_max,s_thresh,qcdir):
+    ls_color = [item + '_nuclei' for item in df_img_all[(df_img_all.round_ord<=i_max) & (df_img_all.slide_scene==df_img_all.slide_scene.unique()[0]) & (df_img_all.marker.str.contains('DAPI'))].marker.tolist()]
+    positive_scatterplots(df_pos_auto,d_thresh_record,df_xy,ls_color + [f'{s_thresh}_cytoplasm'],qcdir)
+    return(ls_color)
+
+def filter_dapi_cellpose(df_pos_auto,ls_color,df_mi,ls_filter,qcdir='.'):
+    '''
+    filter by cell positive for DAPI autotresholding, in rounds specified in ls_filter
+    error
+    '''
+    #plot dapi thresholds
+    df_pos_auto['slide_scene'] = [item.split('_cell')[0] for item in df_pos_auto.index]
+    fig,ax=plt.subplots(figsize=(10,5))
+    df_plot = df_pos_auto.loc[:,ls_color+['slide_scene']]
+    df_scenes = df_plot.groupby('slide_scene').sum().T/df_plot.groupby('slide_scene').sum().max(axis=1)
+    df_scenes.plot(ax=ax,colormap='tab20')
+    ax.set_xticks(np.arange(0,(len(df_scenes.index)),1)) #+1
+    ax.set_xticklabels([item.split('_')[0] for item in df_scenes.index])
+    ax.set_ylim(0.5,1.1)
+    ax.legend(loc=3)
+    plt.tight_layout()
+    df_pos_auto['slide'] = [item.split('_')[0] for item in df_pos_auto.index]
+    ls_slides = sorted(set(df_pos_auto.slide))
+    fig.savefig(f'{qcdir}/QC_DAPIRounds_lineplot_{".".join(ls_slides)}.png')
+    #filter by first and last round dapi
+    ls_dapi_index = df_pos_auto[df_pos_auto.loc[:,ls_filter].all(axis=1)].index
+    #also filter by any dapi less than 1 in mean intensity
+    ls_dapi_missing = df_mi[(df_mi.loc[:,ls_color] < 1).sum(axis=1) > 0].index.tolist()
+    es_dapi_index = set(ls_dapi_index) - set(ls_dapi_missing)
+    print(f'number of cells before DAPI filter = {len(df_mi)}')
+    df_mi_filter = df_mi.loc[df_mi.index.isin(es_dapi_index),:]
+    [print(f'filtering by {item}') for item in ls_filter]
+    print(f'number of cells after DAPI filter = {len(df_mi_filter)}')
+    #drop cells with euler numer > 1
+    #
+    #
+    return(df_mi_filter)
+
+def load_li_thresh(ls_sample, segdir):
+    # load li thresholds
+    os.chdir(segdir)
+    df_img_all =pd.DataFrame()
+    for s_sample in ls_sample:
+        df_img = pd.read_csv(f'thresh_{s_sample}_ThresholdLi.csv', index_col=0)
+        df_img['rounds'] = [item.split('_')[0].split('Registered-')[1] for item in df_img.index]
+        df_img['color'] = [item.split('_')[-2] for item in df_img.index]
+        df_img['slide'] = [item.split('_')[2].split('-Scene-')[0] for item in df_img.index]
+        df_img['scene'] = [item.split('_')[2].split('-Scene-')[1] for item in df_img.index]
+        df_img['slide_scene'] = df_img.slide + '_scene' + df_img.scene
+        #parse file name for biomarker
+        for s_index in df_img.index:
+            #print(s_index)
+            s_color = df_img.loc[s_index,'color']
+            if s_color == 'c1':
+                s_marker = f"DAPI{df_img.loc[s_index,'rounds'].split('R')[1]}"
+            elif s_color == 'c2':
+                s_marker = s_index.split('_')[1].split('.')[0]
+            elif s_color == 'c3':
+                s_marker = s_index.split('_')[1].split('.')[1]
+            elif s_color == 'c4':
+                s_marker = s_index.split('_')[1].split('.')[2]
+            elif s_color == 'c5':
+                s_marker = s_index.split('_')[1].split('.')[3]
+            else: print('Error')
+            df_img.loc[s_index,'marker'] = s_marker
+        df_img_all = df_img_all.append(df_img)
+    return(df_img_all)
+
+def filter_standard(df_mi,d_channel,s_dapi):
+    """
+    If biomarkers have standard names according to preprocess.check_names,
+    use the hard coded locations, adds any channels needed for af subtraction
+    Input:
+    df_mi= mean intensity dataframe 
+    d_channel = dictionary of channel:background marker
+    """
+    es_standard = {'PDL1_Ring','pERK_Nuclei','CK19_Ring','pHH3_Nuclei','CK14_Ring','Ki67_Nuclei','Ki67r_Nuclei','Ecad_Ring','PCNA_Nuclei','HER2_Ring','ER_Nuclei','CD44_Ring',
+        'aSMA_Ring','AR_Nuclei','pAKT_Ring','LamAC_Nuclei','CK5_Ring','EGFR_Ring','pRb_Nuclei','FoxP3_Nuclei','CK7_Ring','PDPN_Ring','CD4_Ring','PgR_Nuclei','Vim_Ring',
+        'CD8_Ring','CD31_Ring','CD45_Ring','panCK_Ring','CD68_Ring','PD1_Ring','CD20_Ring','CK8_Ring','cPARP_Nuclei','ColIV_Ring','ColI_Ring','CK17_Ring',
+        'H3K4_Nuclei','gH2AX_Nuclei','CD3_Ring','H3K27_Nuclei','53BP1_Nuclei','BCL2_Ring','GRNZB_Nuclei','LamB1_Nuclei','pS6RP_Ring','BAX_Nuclei','RAD51_Nuclei',
+        'Glut1_Ring','CoxIV_Ring','LamB2_Nuclei','S100_Ring','BMP4_Ring','PgRc4_Nuclei','pRB_Nuclei','p63_Nuclei','p63_Ring','CGA_Ring','SYP_Ring','pS62MYC_Nuclei', 'HIF1a_Nuclei',
+        'PDGFRa_Ring', 'BMP2_Ring','PgRb_Nuclei','MUC1_Ring','CSF1R_Ring','CAV1_Ring','CCND1_Nuclei','CC3_Nuclei' } #PgRb is second PgR in dataset
+    #generate list of background markers needed for subtraction
+    lls_d_channel = []
+    for s_key,ls_item in d_channel.items():
+        lls_d_channel = lls_d_channel + [ls_item]
+    ls_background = []
+    for ls_channel in lls_d_channel:
+        ls_background = ls_background + [f'{ls_channel[0]}_Ring']
+        ls_background = ls_background + [f'{ls_channel[1]}_Nuclei']
+    #ls_background.append(f'{s_dapi}_Nuclei')
+    ls_background.append(f'{s_dapi}')
+    se_background = set(ls_background)
+    es_common = set(df_mi.columns.tolist()).intersection(es_standard) | se_background
+    df_filtered_mi = df_mi.loc[:,sorted(es_common)]
+    return(df_filtered_mi, es_standard)
+
+def filter_loc_cellpose(df_mi_filled, ls_marker_cyto, ls_custom,filter_na=True):
+    '''
+    get nuclei, perinuclei or cytoplasm, based on filter standard function
+    '''
+    __ , es_standard = filter_standard(pd.DataFrame(columns=['filter_standard']),{},'filter_standard')
+    ls_marker = sorted(set([item.split('_')[0] for item in df_mi_filled.columns[(df_mi_filled.dtypes=='float64') & (~df_mi_filled.columns.str.contains('25'))]]))
+    if ls_marker.count('mean') != 0:
+        ls_marker.remove('mean')
+    es_marker = set(ls_marker)
+    se_stand = pd.Series(index=es_standard)
+    es_dapi = set([item.split('_')[0] for item in df_mi_filled.columns[df_mi_filled.columns.str.contains('DAPI')]])
+    es_nuc = set([item.split('_')[0] for item in se_stand[se_stand.index.str.contains('_Nuclei')].index])
+    es_nuc_select = es_nuc.intersection(es_marker)
+    print('Nuclear markers:')
+    print(es_nuc_select)
+    es_ring = set([item.split('_')[0] for item in se_stand[se_stand.index.str.contains('_Ring')].index])
+    es_ring_select = es_ring.intersection(es_marker)
+    es_cyto = set(ls_marker_cyto) #set([item.split('_')[0] for item in ls_marker_cyto])
+    es_ring_only = es_ring_select - es_cyto
+    print('Ring markers:')
+    print(es_ring_only)
+    print('Cytoplasm markers:')
+    print(es_cyto)
+    es_cust = set([item.split('_')[0] for item in ls_custom])
+    es_left = es_marker - es_ring_only - es_cyto - es_nuc_select - es_dapi - es_cust
+    print('Custom markers:')
+    print(es_cust)
+    print('Markers with Nuclei or Cyto not specified: take both nuclei and ring')
+    print(es_left)
+    ls_n = [item + '_nuclei' for item in sorted(es_left | es_nuc_select | es_dapi)]
+    ls_pn = [item + '_perinuc5' for item in sorted(es_left | es_ring_only)] 
+    ls_cyto = [item + '_cytoplasm' for item in sorted(es_cyto)]
+    ls_all = ls_custom + ls_pn + ls_cyto + ls_n + ['slide_scene']
+    print(f'Missing {set(ls_all) - set(df_mi_filled.columns)}') 
+    df_filter = df_mi_filled.loc[:,ls_all]
+    print('')
+    if filter_na:
+        print(f' NAs filtered: {len(df_filter) - len(df_filter.dropna())}')
+        df_filter = df_filter.dropna()
+    print('')
+    return(df_filter,es_standard)
+
+def marker_table(df_img_all,qcdir):
+    '''
+    make a nice rounds/channels/markers table
+    '''
+    df_img_all['round_ord'] = [re.sub('Q','.5', item) for item in df_img_all.rounds]
+    df_img_all['round_ord'] = [re.sub('r','.25', item) for item in df_img_all.round_ord]
+    df_img_all['round'] = [float(re.sub('[^0-9.]','', item)) for item in df_img_all.round_ord]
+    df_marker = df_img_all[(df_img_all.slide_scene==df_img_all.slide_scene.unique()[0])].loc[:,['marker','round','color']].pivot('round','color')
+    df_marker.index.name = None
+    df_marker.to_csv(f'{qcdir}/MarkerTable.csv',header=None)
+
+def filter_cellpose_df(s_sample,segdir,qcdir,s_thresh,ls_membrane,ls_marker_cyto,ls_custom,ls_filter,ls_shrunk,man_thresh = 900):
+    '''
+    go from full dataframe and membrane dataframe to filtered datframe and xy coordinate dataframe
+    s_thresh='Ecad'
+    ls_membrane = ['HER2']
+    ls_marker_cyto = ['CK14','CK5','CK17','CK19','CK7','CK8','Ecad','HER2','EGFR']
+    ls_custom = ['HER2_cellmem25']
+    ls_filter = ['DAPI9_nuclei','DAPI2_nuclei']
+    ls_shrunk = ['CD44_nucadj2','Vim_nucadj2']
+    man_thresh = 900
+    '''
+    # new
+    os.chdir(segdir)
+    df_img_all = load_li([s_sample],s_thresh, man_thresh)
+    df_mi_full = load_cellpose_df([s_sample], segdir)
+    df_xy = filter_cellpose_xy(df_mi_full)
+    df_mi_full, i_max = drop_last_rounds(df_img_all,ls_filter,df_mi_full)
+    df_mi_filled = fill_cellpose_nas(df_mi_full,ls_marker_cyto,s_thresh=s_thresh,man_thresh=man_thresh)
+    df_mi_filled = shrink_seg_regions(df_mi_filled,s_thresh,ls_celline=[],ls_shrunk=ls_shrunk)
+    df_mi_mem_fill = fill_bright_nas(ls_membrane,s_sample,s_thresh,df_mi_filled,segdir)
+    df_mi,es_standard = filter_loc_cellpose(df_mi_mem_fill, ls_marker_cyto, ls_custom)
+    df_pos_auto,d_thresh_record = auto_threshold(df_mi,df_img_all)
+    ls_color = plot_thresh_results(df_img_all,df_pos_auto,d_thresh_record,df_xy,i_max,s_thresh,qcdir)
+    df_mi_filter = filter_dapi_cellpose(df_pos_auto,ls_color,df_mi,ls_filter,qcdir)
+    df_mi_filter.to_csv(f'{segdir}/features_{s_sample}_FilteredMeanIntensity_{"_".join([item.split("_")[0] for item in ls_filter])}.csv')
+    df_xy.to_csv(f'{segdir}/features_{s_sample}_CentroidXY.csv')
+    return(df_mi_mem_fill,df_img_all)
+
+def filter_cellpose_background(df_mi_filled, es_standard):
+    '''
+    given a set of standard biomarker subcellular locations, obtain the opposite subcellular location 
+    and the mean intensity 
+    input: df_mi = mean intensity dataframe with all biomarker locations
+    es_standard = biomarker ring or nuclei 
+    return: dataframe with each scene and the quantiles of the negative cells scene
+    '''
+    ls_rim = [item.replace('Nuclei','cytoplasm') for item in sorted(es_standard)]
+    ls_nuc_ring = [item.replace('Ring','nuclei') for item in ls_rim]
+    ls_nuc_ring.append('slide_scene')
+    ls_nuc_ring = sorted(set(df_mi_filled.columns).intersection(set(ls_nuc_ring)))
+    #quntiles
+    df_bg =  df_mi_filled.loc[:,ls_nuc_ring].groupby('slide_scene').quantile(0) 
+    df_bg.columns = [f'{item}' for item in df_bg.columns]
+    for q in np.arange(0,1,.1):
+        df_quantile = df_mi_filled.loc[:,ls_nuc_ring].groupby('slide_scene').quantile(q)
+        df_bg = df_bg.merge(df_quantile,left_index=True, right_index=True, suffixes=('',f'_{str(int(q*10))}'))
+    #drop duplicate
+    ls_nuc_ring.remove('slide_scene')
+    df_bg = df_bg.loc[:,~df_bg.columns.isin(ls_nuc_ring)]
+    return(df_bg)
+
+def filter_cellpose_df_old(df_mi_full):
+    '''
+    old
+    '''
+    #filter
+    ls_select = [
+        #nuclei
+        'DAPI1_nuclei', 'DAPI2_nuclei', 'DAPI3_nuclei', 'DAPI4_nuclei','DAPI5_nuclei', 'DAPI5Q_nuclei',
+        'DAPI6_nuclei', 'DAPI7_nuclei','DAPI8_nuclei', 'DAPI9_nuclei',
+        'DAPI10_nuclei', 'DAPI11_nuclei','DAPI12_nuclei','DAPI12Q_nuclei',
+        'ER_nuclei','AR_nuclei','PgR_nuclei',
+        'Ki67_nuclei',  'pRB_nuclei','PCNA_nuclei', 'pHH3_nuclei',
+        'FoxP3_nuclei', 'GRNZB_nuclei',
+        'H3K27_nuclei', 'H3K4_nuclei',  
+        'LamAC_nuclei', 'LamB1_nuclei', 'LamB2_nuclei',
+        'HIF1a_nuclei', 'pERK_nuclei', 'cPARP_nuclei', 'gH2AX_nuclei',
+
+        #perinuc5     
+        'CD44_perinuc5',
+        'CD20_perinuc5', 'CD31_perinuc5',
+        'CD3_perinuc5', 'CD45_perinuc5', 'CD4_perinuc5',
+        'CD68_perinuc5', 'CD8_perinuc5','pS6RP_perinuc5',
+        'ColIV_perinuc5', 'ColI_perinuc5', 'CoxIV_perinuc5',
+        'PD1_perinuc5', 'PDPN_perinuc5','PDGFRa_perinuc5', 
+        'Vim_perinuc5', 'aSMA_perinuc5','BMP2_perinuc5',
+        #cytoplasm
+        #'pAKT_cytoplasm','Glut1_cytoplasm',
+        'CK14_cytoplasm','CK5_cytoplasm','CK17_cytoplasm',
+        'CK19_cytoplasm','CK7_cytoplasm','CK8_cytoplasm',
+        'Ecad_cytoplasm','HER2_cytoplasm','EGFR_cytoplasm',
+        #other
+        'slide_scene',
+        #'area_segmented-nuclei', #'area_segmented-cells',
+        #'eccentricity_segmented-nuclei', #'eccentricity_segmented-cells',
+        #'mean_intensity_segmented-nuclei', #'mean_intensity_segmented-cells',
+        ]
+
+    ls_negative = df_mi_full.columns[df_mi_full.columns.str.contains('_negative')].tolist()
+    #print(type(ls_negative))
+    ls_select = ls_select + ls_negative
+
+    df_mi_nas = df_mi_full.loc[:,df_mi_full.columns.isin(ls_select)]
+    print(f'Selected makers that were missing from mean intensity {set(ls_select) - set(df_mi_nas.columns)}')
+    #fiter out nas
+    print(f'Number on df_mi nas = {df_mi_nas.isna().sum().max()}')
+    df_mi = df_mi_nas.dropna(axis=0,how='any')
+    return(df_mi,df_mi_nas)
+
+###### below: functions for guillaumes features  ########
+
+def load_mi(s_sample, s_path='./', b_set_index=True):
+    """
+    input:
+        s_sample: string with sample name
+        s_path: file path to data, default is current folder
+        b_set_index: 
+
+    output:
+        df_mi: dateframe with mean intensity
+          each row is a cell, each column is a biomarker_location
+
+    description:
+      load the mean intensity dataframe
+    """
+    print(f'features_{s_sample}_MeanIntensity.tsv')
+    df_mi = pd.read_csv(
+        f'{s_path}features_{s_sample}_MeanIntensity.tsv',
+        sep='\t',
+        index_col=0
+        )
+    if b_set_index:
+        df_mi = df_mi.set_index(f'{s_sample}_' + df_mi.index.astype(str))
+    return(df_mi)
+
+def load_xy(s_sample, s_path='./', b_set_index=True):
+    """
+    input:
+        s_sample: string with sample name
+        s_path: file path to data, default is current folder
+        b_set_index: 
+
+    output:
+        df_mi: dateframe with mean intensity
+          each row is a cell, each column is a biomarker_location
+
+    description:
+      load the mean intensity dataframe
+    """
+    print(f'features_{s_sample}_CentroidY.tsv')
+    df_y = pd.read_csv(
+        f'features_{s_sample}_CentroidY.tsv',
+        sep='\t',
+        index_col=0
+        )
+    if b_set_index:
+        df_y = df_y.set_index(f'{s_sample}_' + df_y.index.astype(str))
+
+    print(f'features_{s_sample}_CentroidX.tsv')
+    df_x = pd.read_csv(
+        f'features_{s_sample}_CentroidX.tsv',
+        sep='\t',
+        index_col=0
+        )
+    if b_set_index:
+        df_x = df_x.set_index(f'{s_sample}_' + df_x.index.astype(str))
+    #merge the x and y dataframes
+    df_xy = pd.merge(df_x,df_y,left_index=True,right_index=True,suffixes=('_X', '_Y'))
+    return(df_xy)
+
+def add_scene(df,i_scene_index=1,s_group='scene'):
+    """
+    decription: add a coulmn with a grouping to dataframe that has grouping in the index
+    """
+    lst = df.index.str.split('_')
+    lst2 = [item[i_scene_index] for item in lst]
+    df[s_group] = lst2
+    return(df)
+
+def filter_dapi(df_mi,df_xy,s_dapi='DAPI11_Nuclei',dapi_thresh=1000,b_images=False,t_figsize=(8,8)):
+    """
+    description: return a dataframe where all cells have DAPI brigter than a threshold
+    right now the plotting works!
+    """
+    df_filtered_mi = df_mi.copy(deep=True)
+    #get tissue id from the dataframe
+    s_tissue = df_mi.index[0].split('_')[0]
+    #DAPI filter
+    df_filtered_mi = df_filtered_mi[df_filtered_mi.loc[:,s_dapi]>dapi_thresh]
+    print(f'Cells before DAPI filter = {len(df_mi)}')
+    print(f'Cells after DAPI filter = {len(df_filtered_mi)}')
+    df_filtered_mi.index.name='UNIQID'
+    if b_images:
+        ls_scene=list(set(df_xy.scene))
+        ls_scene.sort()
+        for s_scene in ls_scene:
+            df_pos = df_xy.loc[df_filtered_mi.index.tolist()]
+            df_pos_scene = df_pos[df_pos.scene==s_scene]
+            if len(df_pos_scene) >= 1:
+                fig,ax=plt.subplots(figsize=t_figsize)
+                ax.scatter(x=df_xy[df_xy.scene==s_scene].loc[:,'DAPI_X'], y=df_xy[df_xy.scene==s_scene].loc[:,'DAPI_Y'], color='silver',label='DAPI neg', s=2)
+                ax.scatter(x=df_pos_scene.loc[:,'DAPI_X'], y=df_pos_scene.loc[:,'DAPI_Y'], color='DarkBlue',label='DAPI pos',s=2)
+                ax.axis('equal')
+                ax.set_ylim(ax.get_ylim()[::-1])
+                ax.set_title(f'{s_scene}_DAPI')
+                plt.legend(markerscale=3)
+                fig.savefig(f'{s_tissue}_{s_scene}_{s_dapi}{dapi_thresh}.png')
+    return(df_filtered_mi)
+
+def load_meta(s_sample, s_path='./',type='csv'):
+    """
+    load rounds cycles table
+    make sure to specify location for use with downstream functions
+    make sure to add rows for any biomarkers used for analysis or processing
+    """
+    #tab or space delimited
+    if type == 'Location':
+        print(f'metadata_{s_sample}_RoundsCyclesTable_location.txt')
+        df_t = pd.read_csv(
+            f'metadata_{s_sample}_RoundsCyclesTable_location.txt',
+            delim_whitespace=True,
+            header=None,
+            index_col=False,
+            names=['marker', 'rounds','color','minimum', 'maximum', 'exposure', 'refexp','location'],
+            )
+        df_t = df_t.set_index(f'{s_sample}_' + df_t.index.astype(str))
+        df_t.replace({'Nucleus':'Nuclei'},inplace=True)
+        df_t['marker_loc'] = df_t.marker + '_' + df_t.location
+        df_t.set_index(keys='marker_loc',inplace=True)
+    elif type == 'csv':
+        print(f'metadata_{s_sample}_RoundsCyclesTable.csv')
+        df_t = pd.read_csv(
+            f'metadata_{s_sample}_RoundsCyclesTable.csv',
+            header=0,
+            index_col=0,
+            names=['rounds','color','minimum', 'maximum', 'exposure', 'refexp','location'],#'marker',
+            )
+        #df_t = df_t.set_index(f'{s_sample}_' + df_t.index.astype(str))
+        df_t.replace({'Nucleus':'Nuclei'},inplace=True)
+    #
+    elif type == 'LocationCsv':
+        print(f'metadata_{s_sample}_RoundsCyclesTable_location.csv')
+        df_t = pd.read_csv(
+            f'metadata_{s_sample}_RoundsCyclesTable_location.csv',
+            header=0,
+            index_col=False,
+            names=['marker', 'rounds','color','minimum', 'maximum', 'exposure', 'refexp','location'],
+            )
+        df_t = df_t.set_index(f'{s_sample}_' + df_t.index.astype(str))
+        df_t.replace({'Nucleus':'Nuclei'},inplace=True)
+        df_t['marker_loc'] = df_t.marker + '_' + df_t.location
+        df_t.set_index(keys='marker_loc',inplace=True)
+    else:
+        print(f'metadata_{s_sample}_RoundsCyclesTable.txt')
+        df_t = pd.read_csv(
+            f'metadata_{s_sample}_RoundsCyclesTable.txt',
+            delim_whitespace=True,
+            header=None,
+            index_col=False,
+            names=['rounds','color','minimum', 'maximum', 'exposure', 'refexp','location'],#'marker',
+            )
+        df_t = df_t.set_index(f'{s_sample}_' + df_t.index.astype(str))
+        df_t.replace({'Nucleus':'Nuclei'},inplace=True)
+    return(df_t)
+
+def add_exposure_roundscyles(df_tc, df_expc,es_standard,ls_dapi = ['DAPI12_Nuclei']):
+    """
+    df_exp = dataframe of exposure times with columns [0, 1,2,3,4]
+            and index with czi image names
+    df_t = metadata with dataframe with ['marker','exposure']
+    """
+    df_t = copy.copy(df_tc)
+    df_exp = copy.copy(df_expc)
+    df_t['location'] = ''
+    df_t.drop([item.split('_')[0] for item in ls_dapi], inplace=True)
+    df_exp.columns = ['c' + str(int(item)+1) for item in df_exp.columns]
+    df_exp['rounds'] = [item.split('_')[0] for item in df_exp.index]
+    for s_index in df_t.index:
+        s_channel = df_t.loc[s_index,'colors']
+        s_round = df_t.loc[s_index, 'rounds']
+        print(s_round)
+        #look up exposure time for marker in metadata
+        df_t_image = df_exp[(df_exp.rounds==s_round)]
+        if len(df_t_image) > 0:
+                i_exposure = df_t_image.loc[:,s_channel]
+                df_t.loc[s_index,'exposure'] = i_exposure[0]
+                df_t.loc[s_index,'refexp'] = i_exposure[0]
+        else:
+                print(f'{s_marker} has no recorded exposure time')
+        s_ring = s_index + '_Ring'
+        s_nuc = s_index + '_Nuclei'
+        ls_loc = sorted(es_standard.intersection({s_ring,s_nuc}))
+        if len(ls_loc) == 1:
+            df_t.loc[s_index,'location'] = ls_loc[0].split('_')[1]
+    return(df_t)
+
+def filter_loc(df_mi,df_t):
+    """
+    filters columns of dataframe based on locations selected in metadata_location table
+    """
+    ls_bio_loc = df_t.index.tolist()
+    df_filtered_mi = df_mi.loc[:,ls_bio_loc]
+    return(df_filtered_mi)
+
+#R0c2 R0c3 R0c4 R0c5 panCK CK14  Ki67 CK19  R1rc2 R1rc3 Ki67r R1rc5 PCNA HER2 ER Ecad aSMA AR pAKT
+#CD44 CK5 EGFR pRB LamAC pHH3 PDPN pERK FoxP3 R5Qc2 R5Qc3 R5Qc4 R5Qc5 CK7 CD68 PD1 CD45 Vim CD8 CD4 PgR CK8 cPARP ColIV CD20 CK17
+#H3K4 gH2AX ColI H3K27 pS6RP CD31 GRNZB LamB1 CoxIV HIF1a CD3 Glut1 PDGFRa LamB2 BMP2 R12Qc2 R12Qc3 R12Qc4 R12Qc5 DAPI12
+
+def filter_background(df_mi, es_standard):
+    '''
+    given a set of standard biomarker subcellular locations, obtain the opposite subcellular location 
+    and the mean intensity 
+    input: df_mi = mean intensity dataframe with all biomarker locations
+    es_standard = biomarker ring or nuclei 
+    return: dataframe with each scene and the quantiles of the negative cells
+    '''
+    ls_rim = [item.replace('Nuclei','Rim') for item in sorted(es_standard)]
+    ls_nuc_rim =  [item.replace('Ring','Nuclei') for item in ls_rim]
+    ls_nuc_ring = [item.replace('Rim','Ring') for item in ls_nuc_rim]
+    ls_nuc_ring.append('scene')
+    ls_nuc_rim.append('scene')
+    df_scene = add_scene(df_mi)
+    ls_nuc_ring = sorted(set(df_scene.columns).intersection(set(ls_nuc_ring)))
+    #quntiles
+    df_bg =  df_scene.loc[:,ls_nuc_ring].groupby('scene').quantile(0) 
+    df_bg.columns = [f'{item}' for item in df_bg.columns]
+    for q in np.arange(0,1,.1):
+        df_quantile = df_scene.loc[:,ls_nuc_ring].groupby('scene').quantile(q)
+        df_bg = df_bg.merge(df_quantile,left_index=True, right_index=True, suffixes=('',f'_{str(int(q*10))}'))
+        print(q)
+        print(f'_{str(int(q*10))}')
+    #mean
+    df_quantile = df_scene.loc[:,ls_nuc_ring].groupby('scene').mean()
+    df_bg = df_bg.merge(df_quantile,left_index=True, right_index=True, suffixes=('','_mean'))
+    #drop duplicate
+    ls_nuc_ring.remove('scene')
+    df_bg = df_bg.loc[:,~df_bg.columns.isin(ls_nuc_ring)]
+    return(df_bg)
+
+def exposure_norm(df_mi,df_t,d_factor={'c1':10,'c2':30,'c3':200,'c4':500,'c5':500}):
+    """
+    normalizes to standard exposure times
+    input: mean intensity, and metadata table with exposure time
+    """
+    df_norm = pd.DataFrame()
+    ls_columns =  [item.split('_')[0] for item in df_mi.columns.tolist()]
+    ls_column_mi = df_mi.columns.tolist()
+    for idx, s_column in enumerate(ls_columns):
+
+        s_marker = s_column.split('_')[0]
+        i_exp = df_t.loc[s_column,'exposure']
+        print(f'Processing exposure time for {s_column}: {i_exp}')
+        print(f'Processing mean intensity {ls_column_mi[idx]}')
+        i_factor = d_factor[df_t.loc[s_column,'colors']]
+        se_exp = df_mi.loc[:,ls_column_mi[idx]]
+        df_norm[ls_column_mi[idx]] = se_exp/i_exp*i_factor
+    return(df_norm)
+
+def af_subtract(df_norm,df_t,d_channel={'c2':['L488','L488'],'c3':['L555','L555'],'c4':['L647','L647'],'c5':['L750','L750']},ls_exclude=[]):
+    """
+    given an exposure normalized dataframe, metadata with biomarker location, and a dictionary of background channels, subtracts
+    correct background intensity from each cell
+    input:
+    d_channel = dictionary, key is color i.e. 'c2', value is list of ['Ring','Nuclei']
+    ls_exclude = markers to not subtract
+    output:
+    df_mi_sub,ls_sub,ls_record
+    """
+    #generate list of background markers needed for subtraction
+    lls_d_channel = []
+    for s_key,ls_item in d_channel.items():
+        lls_d_channel = lls_d_channel + [ls_item]
+    ls_background = []
+    for ls_channel in lls_d_channel:
+        ls_background = ls_background + [f'{ls_channel[0]}_Ring']
+        ls_background = ls_background + [f'{ls_channel[1]}_Nuclei']
+    se_background = set(ls_background)
+    se_exclude = set([item + '_Ring' for item in ls_exclude] + [item + '_Nuclei' for item in ls_exclude]).intersection(set(df_norm.columns.tolist()))
+    se_all = set(df_norm.columns.tolist())
+    se_sub = se_all - se_background - se_exclude
+    ls_sub = list(se_sub)
+
+    #subtract AF channels
+    df_mi_sub = pd.DataFrame()
+    
+    ls_record = []
+    for s_marker_loc in ls_sub:
+        print(s_marker_loc)
+        s_marker = s_marker_loc.split('_')[0]
+        s_loc = s_marker_loc.split('_')[1]
+        s_channel = df_t.loc[s_marker,'colors']
+        if s_channel == 'c1':
+            df_mi_sub[s_marker_loc] = df_norm.loc[:,s_marker_loc]
+            continue
+        if s_loc =='Nuclei':
+            s_AF = d_channel[s_channel][1]
+        elif s_loc == 'Ring':
+            s_AF = d_channel[s_channel][0]
+        else:
+            print('Error: location must be Ring or Nucleus')
+        s_AF_loc = s_AF + '_' + s_loc
+        df_mi_sub[s_marker_loc] = df_norm.loc[:,s_marker_loc] - df_norm.loc[:,s_AF_loc]
+        print(f'From {s_marker_loc} subtracting {s_AF_loc}')
+        ls_record = ls_record + [f'From {s_marker_loc} subtracting {s_AF_loc}\n']
+    for s_marker in sorted(se_exclude):
+        ls_record = ls_record + [f'From {s_marker} subtracting None\n']
+    df_mi_sub[sorted(se_exclude)] = df_norm.loc[:,sorted(se_exclude)]
+    #f = open(f"AFsubtractionData.txt", "w")
+    #f.writelines(ls_record)
+    #f.close()
+    #error check
+    print('AF subtraction not performed for the following markers:')
+    print(set(df_t.index) - set(ls_sub))
+    
+    return(df_mi_sub,ls_sub,ls_record)
+
+def plot_subtraction(df_norm,df_sub,ls_scene=None):
+    """
+    makes scatterplots of each marker, subtracted versus original meanintensity per cell, to judge subtraction effectiveness
+    """
+    if ls_scene == None:
+        ls_scene = list(set(df_norm.scene))
+    ls_marker = df_sub.columns.tolist()
+    ls_marker.remove('scene')
+    ls_scene.sort()
+    for s_marker in ls_marker:
+        print(f'Plotting {s_marker}')
+        fig, ax = plt.subplots(2,(len(ls_scene)+1)//2, figsize = (12,4))
+        ax = ax.ravel()
+        ax_num = -1
+        for s_scene in ls_scene:
+            df_subtracted = df_sub[df_sub.scene==s_scene]
+            df_original = df_norm[df_norm.scene==s_scene]
+            ax_num = ax_num + 1
+            ax[ax_num].scatter(x=df_original.loc[:,s_marker],y=df_subtracted.loc[:,s_marker],s=1,alpha=0.8)
+            ax[ax_num].set_title(s_scene,{'fontsize': 10,'verticalalignment': 'center'})
+            fig.text(0.5, 0.01, s_marker, ha='center') 
+            fig.text(0.6, 0.01, 'Original', ha='center') 
+            fig.text(0.01, 0.6, 'Subtracted', va='center', rotation='vertical')
+            plt.tight_layout()
+            fig.savefig(f'{s_marker}_NegativevsOriginal.png')
+
+def output_subtract(df_sub,df_t,d_factor={'c1':10,'c2':30,'c3':200,'c4':500,'c5':500}):
+    """
+    this un-normalizes by exposure time to output a new dataframe of AF subtracted cells for analysis
+    """
+    ls_sub = df_sub.columns.tolist()
+    result = any(elem == 'scene' for elem in ls_sub)
+    if result:
+        ls_sub.remove('scene')
+        df_sub = df_sub.drop(columns='scene')
+    else:
+        print('no scene column')
+    df_mi_zero = df_sub.clip(lower = 0)
+    df_mi_factor = pd.DataFrame()
+    for s_sub in ls_sub:
+        s_dft_index = s_sub.split('_')[0]
+        i_reverse_factor = df_t.loc[s_dft_index,'exposure']/d_factor[df_t.loc[s_dft_index,'colors']]
+        df_mi_factor[s_sub] = df_mi_zero.loc[:,s_sub]*i_reverse_factor
+    return df_mi_factor
+
+def af_subtract_images(df_t,d_channel={'c2':['L488','L488'],'c3':['L555','L555'],'c4':['L647','L647'],'c5':['L750','L750']},s_dapi='DAPI11_Nuclei',b_mkdir=True):
+    """
+    This code loads 16 bit grayscale tiffs, performs AF subtraction of channels/rounds defined by the user, and outputs 8 bit AF subtracted tiffs for visualization.
+    The data required is:
+    1. The RoundsCyclesTable.txt with the location (Nucleus/Ring) specified (not All), and real expsure times
+    2. 16 bit grayscale tiff images following Koei's naming convention (script processes the list of folders ls_folder) 
+    Note: name of folder can be anything
+    """
+    #generate list of markers needing subtraction
+    lls_d_channel = []
+    for s_key in d_channel:
+        lls_d_channel = lls_d_channel + [d_channel[s_key]]
+    ls_background = []
+    for ls_channel in lls_d_channel:
+        ls_background = ls_background + [f'{ls_channel[0]}_Ring']
+        ls_background = ls_background + [f'{ls_channel[1]}_Nuclei']
+    se_background = set(ls_background)
+    se_all = set(df_t.index)
+    se_sub = se_all - se_background
+    ls_sub = list(se_sub)
+    #ls_sub.remove(s_dapi) #don't need line if s_DAPI is c1
+    #subtract images
+    #os.makedirs('8bit/', exist_ok=True)
+    if b_mkdir:
+        os.mkdir('8bit')
+    ls_image = os.listdir()
+    ls_slide = []
+    ls_image_org = []
+    for s_image in ls_image:
+        if s_image.find('_ORG.tif')>-1:
+            #make a list of slides/scenes in the folder
+            s_slide = s_image.split('_')[2]
+            ls_slide = ls_slide + [s_slide]
+            #make a list of all original images in the folder
+            ls_image_org = ls_image_org + [s_image]
+    ls_slide = list(set(ls_slide))
+    #process each slide in the folder
+    for s_slide in ls_slide:
+        print(f'Processing {s_slide}')
+        df_t['image'] = 'NA'
+        ls_dapi = []
+        
+        for s_image in ls_image_org:
+            
+            #grab all original images with slide/scene name
+            if s_image.find(s_slide) > -1:
+        
+                #add matching image name to df_t (fore specific slide/scene, dapi not included)
+                s_round = s_image.split('Registered-')[1].split('_')[0]
+                s_color = s_image.split('Scene-')[1].split('_')[1]
+                s_index = df_t[(df_t.rounds==s_round) & (df_t.color==s_color)].index
+                df_t.loc[s_index,'image'] = s_image
+                if s_color == 'c1':
+                    ls_dapi = ls_dapi + [s_image]
+        #subtract images
+        ls_record = []
+        for s_marker_loc in ls_sub:
+            s_marker = s_marker_loc.split('_')[0]
+            s_loc = s_marker_loc.split('_')[1]
+            s_rounds= df_t.loc[s_marker_loc,'rounds']
+            s_channel = df_t.loc[s_marker_loc,'color']
+            if s_channel == 'c1':
+                print(f'{s_marker_loc} is DAPI')
+                continue
+            elif s_loc =='Nuclei':
+                s_AF = d_channel[s_channel][1]
+            elif s_loc == 'Ring':
+                s_AF = d_channel[s_channel][0]
+            else:
+                print('Error: location must be Ring or Nucleus')
+            s_AF_loc = s_AF + '_' + s_loc 
+            print(f'From {s_marker_loc} subtracting {s_AF_loc}')
+            s_image = df_t.loc[s_marker_loc,'image']
+            s_background = df_t.loc[s_AF_loc,'image']
+            a_img = skimage.io.imread(s_image)
+            a_AF = skimage.io.imread(s_background)
+            #divide each image by exposure time
+            #subtract 1 ms AF from 1 ms signal
+            #multiply by original image exposure time
+            a_sub = (a_img/df_t.loc[s_marker_loc,'exposure'] - a_AF/df_t.loc[s_AF_loc,'exposure'])*df_t.loc[s_marker_loc,'exposure']
+
+            ls_record = ls_record + [f'From {s_marker_loc} subtracting {s_AF_loc}\n']
+            #make all negative numbers into zero
+            a_zero = a_sub.clip(min=0,max=a_sub.max())
+            a_zero_8bit = (a_zero/256).astype(np.uint8)
+            s_fname = f"8bit/{s_rounds}_{s_marker}_{s_slide}_{s_channel}_8bit.tif"
+            skimage.io.imsave(s_fname,a_zero_8bit)
+        f = open(f"8bit/AFsubtractionImages.txt", "w")
+        f.writelines(ls_record)
+        f.close()
+        #save 8 bit dapis
+        for s_dapi in ls_dapi:
+            a_img = skimage.io.imread(s_dapi)
+            a_zero_8bit = (a_img/256).astype(np.uint8)
+            s_marker = 'DAPI'
+            s_channel = 'c1'
+            s_round =  s_dapi.split('Registered-')[1].split('_')[0]
+            s_fname = f"8bit/{s_round}_{s_marker}_{s_slide}_{s_channel}_8bit.tif"
+            skimage.io.imsave(s_fname,a_zero_8bit)
+
+def round_overlays():
+    """
+    output multipage tiffs with five channels per round
+    """
+    os.chdir('./8bit')
+    ls_image = os.listdir()
+    ls_slide = []
+    ls_image_org = []
+    ls_round = []
+
+    for s_image in ls_image:
+        if s_image.find('8bit.tif') > -1:
+            #make a list of slides/scenes
+            #also make list of rounds
+            s_slide = s_image.split('_')[2]
+            ls_slide = ls_slide + [s_slide]
+            ls_image_org = ls_image_org + [s_image]
+            s_round = s_image.split('_')[0]
+            ls_round = ls_round + [s_round]
+        ls_slide = list(set(ls_slide))
+        ls_round = list(set(ls_round))
+    for s_slide in ls_slide:
+        print(f'Processing {s_slide}')
+        for s_round in ls_round:
+            d_overlay = {}
+            ls_color_round = []
+            for s_image in ls_image_org:
+                if s_image.find(s_slide) > -1:
+                    if s_image.find(f'{s_round}_') == 0:
+                        s_color = s_image.split('_')[3]
+                        d_overlay.update({s_color:s_image})
+                        s_image_round = s_image
+            a_size = skimage.io.imread(s_image_round)
+            a_overlay = np.zeros((len(d_overlay),a_size.shape[0],a_size.shape[1]),dtype=np.uint8)
+            s_biomarker_all = ''
+            i = -1 
+            for s_color in sorted(d_overlay.keys()):
+                i = i + 1
+                s_overlay= d_overlay[s_color]
+                s_biomarker = s_overlay.split('_')[1] + '.'
+                s_biomarker_all = s_biomarker_all + s_biomarker
+                a_channel = skimage.io.imread(s_overlay)
+                a_overlay[i,:,:] = a_channel
+            s_biomarker_all = s_biomarker_all[:-1]
+            #this works. Open in image j. use Image/Color/Make Composite. Then use 
+            #Image/Color/Channels Tool to turn on and off channels
+            #use Image/Adjust/Brightness/Contrast to adjust 
+            with skimage.external.tifffile.TiffWriter(f'{s_round}_{s_biomarker_all}_{s_slide}_overlay.tiff', imagej=True) as tif:
+                for i in range(a_overlay.shape[0]):
+                    tif.save(a_overlay[i])
+    os.chdir('..')
+
+def custom_overlays(d_combos, df_img, df_dapi):
+    """
+    output custon multi page tiffs according to dictionary, with s_dapi as channel 1 in each overlay
+    BUG with 53BP1
+    d_combos = {'Immune':{'CD45', 'PD1', 'CD8', 'CD4', 'CD68', 'FoxP3','GRNZB','CD20','CD3'},
+    'Stromal':{'Vim', 'aSMA', 'PDPN', 'CD31', 'ColIV','ColI'},
+    'Differentiation':{'CK19', 'CK7','CK5', 'CK14', 'CK17','CK8'},
+    'Tumor':{'HER2', 'Ecad', 'ER', 'PgR','Ki67','PCNA'},
+    'Proliferation':{'EGFR','CD44','AR','pHH3','pRB'}, 
+    'Functional':{'pS6RP','H3K27','H3K4','cPARP','gH2AX','pAKT','pERK'},
+    'Lamins':{'LamB1','LamAC', 'LamB2'}}
+    """
+    #os.chdir('./AFSubtracted')
+
+    ls_slide = list(set(df_img.scene))
+    #now make overlays
+    for s_slide in ls_slide:
+        print(f'Processing {s_slide}')
+        df_slide = df_img[df_img.scene==s_slide]
+        s_image_round = (df_dapi[df_dapi.scene == s_slide]).index[0]
+        if len((df_dapi[df_dapi.scene == s_slide]).index) == 0:
+            print('Error: dapi not found')
+        elif len((df_dapi[df_dapi.scene == s_slide]).index) > 1:
+            print('Error: too many dapi images found')
+        else:
+            print(s_image_round)
+        #exclude any missing biomarkers
+        es_all = set(df_slide.marker)
+        if len(list(set(df_img.imagetype)))==1:
+            s_imagetype = list(set(df_img.imagetype))[0]
+            print(s_imagetype)
+        else:
+            print('Error: more than one image type)')
+        for s_type in d_combos:
+            d_overlay = {}
+            es_combos = d_combos[s_type]
+            es_combos_shared = es_combos.intersection(es_all)
+            for idx, s_combo in enumerate(sorted(es_combos_shared)):
+                s_filename = (df_slide[df_slide.marker==s_combo]).index[0]
+                if len((df_slide[df_slide.marker==s_combo]).index) == 0:
+                    print('Error: marker not found')
+                elif len((df_slide[df_slide.marker==s_combo]).index) > 1:
+                    print('Error: too many marker images found')
+                else:
+                    print(s_filename)
+                d_overlay.update({s_combo:s_filename})
+            d_overlay.update({'1AAADAPI':s_image_round})
+            a_size = skimage.io.imread(s_image_round)
+            a_overlay = np.zeros((len(d_overlay),a_size.shape[0],a_size.shape[1]),dtype=np.uint8)
+            s_biomarker_all = ''
+            i = -1 
+            for s_color in sorted(d_overlay.keys()):
+                i = i + 1
+                s_overlay= d_overlay[s_color]
+                s_biomarker = s_color.split('1AAA')[0] + '.'
+                s_biomarker_all = s_biomarker_all + s_biomarker
+                a_channel = skimage.io.imread(s_overlay)
+                if s_imagetype=='ORG':
+                    a_channel = (a_channel/256).astype(np.uint8)
+                    print('covert to 8 bit')
+                a_overlay[i,:,:] = a_channel
+            s_biomarker_all = s_biomarker_all[1:-1]
+            #this works. Open in image j. use Image/Color/Make Composite. Then use 
+            #Image/Color/Channels Tool to turn on and off channels
+            #use Image/Adjust/Brightness/Contrast to adjust 
+            with skimage.external.tifffile.TiffWriter(f'./{s_type}_{((df_dapi[df_dapi.scene==s_slide]).marker[0])}.{s_biomarker_all}_{s_slide}_overlay.tiff', imagej=True) as tif:
+                for i in range(a_overlay.shape[0]):
+                    tif.save(a_overlay[i])
+            print(f'saved {s_type}')
+
+def custom_crop_overlays(d_combos,d_crop, df_img,s_dapi, tu_dim=(1000,1000)): #df_dapi,
+    """
+    output custon multi page tiffs according to dictionary, with s_dapi as channel 1 in each overlay
+    BUG with 53BP1
+    d_crop : {slide_scene : (x,y) coord
+    tu_dim = (width, height)
+    d_combos = {'Immune':{'CD45', 'PD1', 'CD8', 'CD4', 'CD68', 'FoxP3','GRNZB','CD20','CD3'},
+    'Stromal':{'Vim', 'aSMA', 'PDPN', 'CD31', 'ColIV','ColI'},
+    'Differentiation':{'CK19', 'CK7','CK5', 'CK14', 'CK17','CK8'},
+    'Tumor':{'HER2', 'Ecad', 'ER', 'PgR','Ki67','PCNA'},
+    'Proliferation':{'EGFR','CD44','AR','pHH3','pRB'}, 
+    'Functional':{'pS6RP','H3K27','H3K4','cPARP','gH2AX','pAKT','pERK'},
+    'Lamins':{'LamB1','LamAC', 'LamB2'}}
+    """
+    #os.chdir('./AFSubtracted')
+
+    ls_slide = list(set(df_img.scene))
+    #now make overlays
+    for s_slide, xy_cropcoor in d_crop.items():
+        print(f'Processing {s_slide}')
+        df_slide = df_img[df_img.scene==s_slide]
+        s_image_round = df_slide[df_slide.marker==s_dapi.split('_')[0]].index[0]
+        if len(df_slide[df_slide.marker==s_dapi.split('_')[0]].index) == 0:
+            print('Error: dapi not found')
+        elif len(df_slide[df_slide.marker==s_dapi.split('_')[0]].index) > 1:
+            print('Error: too many dapi images found')
+        else:
+            print(s_image_round)
+        #exclude any missing biomarkers
+        es_all = set(df_slide.marker)
+        if len(list(set(df_img.imagetype)))==1:
+            s_imagetype = list(set(df_img.imagetype))[0]
+            print(s_imagetype)
+        else:
+            print('Error: more than one image type)')
+        for s_type, es_combos in d_combos.items():
+            d_overlay = {}
+            es_combos_shared = es_combos.intersection(es_all)
+            for idx, s_combo in enumerate(sorted(es_combos_shared)):
+                s_filename = (df_slide[df_slide.marker==s_combo]).index[0]
+                if len((df_slide[df_slide.marker==s_combo]).index) == 0:
+                    print('Error: marker not found')
+                elif len((df_slide[df_slide.marker==s_combo]).index) > 1:
+                    print('Error: too many marker images found')
+                else:
+                    print(s_filename)
+                d_overlay.update({s_combo:s_filename})
+            d_overlay.update({'1AAADAPI':s_image_round})
+            a_size = skimage.io.imread(s_image_round)
+            #crop 
+            a_crop = a_size[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+            a_overlay = np.zeros((len(d_overlay),a_crop.shape[0],a_crop.shape[1]),dtype=np.uint8)
+            s_biomarker_all = ''
+            i = -1 
+            for s_color in sorted(d_overlay.keys()):
+                i = i + 1
+                s_overlay= d_overlay[s_color]
+                s_biomarker = s_color.split('1AAA')[0] + '.'
+                s_biomarker_all = s_biomarker_all + s_biomarker
+                a_size = skimage.io.imread(s_overlay)
+                #crop 
+                a_channel = a_size[(xy_cropcoor[1]):(xy_cropcoor[1]+tu_dim[1]),(xy_cropcoor[0]):(xy_cropcoor[0]+tu_dim[0])]
+                if s_imagetype=='ORG':
+                    a_channel = (a_channel/256).astype(np.uint8)
+                    print('covert to 8 bit')
+                a_overlay[i,:,:] = a_channel
+            s_biomarker_all = s_biomarker_all[1:-1]
+            #this works. Open in image j. use Image/Color/Make Composite. Then use 
+            #Image/Color/Channels Tool to turn on and off channels
+            #use Image/Adjust/Brightness/Contrast to adjust 
+            with skimage.external.tifffile.TiffWriter(f'./{s_type}_{s_dapi.split("_")[0]}.{s_biomarker_all}_{s_slide}_x{xy_cropcoor[0]}y{xy_cropcoor[1]}_overlay.tiff', imagej=True) as tif:
+                for i in range(a_overlay.shape[0]):
+                    tif.save(a_overlay[i])
+            print(f'saved {s_type}')
+
+def make_thresh_df(df_out,ls_drop=None):
+    """
+    makes a thresholding csv matching the output dataframe (df_out)'s scenes and biomarkers
+    """
+    ls_scene = list(set(df_out.scene))
+    ls_scene.append('global_manual')
+    ls_scene.sort()
+    ls_biomarker = df_out.columns.tolist()
+    ls_biomarker.remove('scene')
+    if ls_drop != None:
+        for s_drop in ls_drop:
+            ls_biomarker.remove(s_drop)
+    ls_manual = []
+    for s_biomarker in ls_biomarker:
+        s_marker = s_biomarker.split('_')[0] + '_manual'
+        ls_manual.append(s_marker)
+    ls_manual.sort()
+    df_thresh = pd.DataFrame(index=ls_scene,columns=ls_manual)
+    #df_thresh_t = df_thresh.transpose()
+    return(df_thresh)
+
+def check_seg(s_sample= 'sampleID',ls_find=['Cell Segmentation Full Color'], i_rows=2, t_figsize=(20,10)):
+    """
+    This script makes overviews of all the specified segmentation images of guillaumes ouput images
+    in a big folder (slides prepared for segmentation for example)
+    Input: ls_find = list of images to view
+     i_rows = number or rows in figure
+     t_figsize = (x, y) in inches size of figure
+     b_mkdir = boolean whether to make a new Check_Registration folder (deprecated)
+    Output: dictionary with {slide_color:number of rounds found}
+     images of all rounds of a certain slide_color
+    """
+    d_result = {}
+    #if b_mkdir:
+    #        os.mkdir(f'./Check_Registration')
+    for s_find in ls_find:
+        #find all dapi slides
+        ls_dapis = []
+        for s_dir in os.listdir():
+            if s_dir.find(s_find) > -1:
+                ls_dapis = ls_dapis + [s_dir]
+        ls_dapis.sort()
+        
+        #find all unique scenes
+        ls_scene_long = []
+        for s_dapi in ls_dapis:
+            ls_scene_long = ls_scene_long + [(s_dapi.split('-')[0])]
+        ls_scene = list(set(ls_scene_long))
+        ls_scene.sort()
+        fig,ax = plt.subplots(i_rows,(len(ls_scene)+(i_rows-1))//i_rows, figsize = t_figsize, squeeze=False)
+        ax = ax.ravel()
+        for idx, s_scene in enumerate(ls_scene):
+            print(f'Processing {s_scene}')
+            im_low = skimage.io.imread(ls_dapis[idx])#,plugin='simpleitk'
+            im = skimage.exposure.rescale_intensity(im_low,in_range=(np.quantile(im_low,0.02),np.quantile(im_low,0.98)+np.quantile(im_low,0.98)/2))
+            im = skimage.transform.rescale(im, 0.25, anti_aliasing=False)
+            ax[idx].imshow(im) #, cmap='gray'
+            ax[idx].set_title(s_scene,{'fontsize':12})
+        plt.tight_layout()
+        #fig.savefig(f'../Check_Registration/{s_sample}_{s_find}.png')
+        d_result.update({f'{s_sample}_{s_find}.png':fig})
+    return(d_result)
diff --git a/mplex_image/register.py b/mplex_image/register.py
new file mode 100755
index 0000000..b963866
--- /dev/null
+++ b/mplex_image/register.py
@@ -0,0 +1,105 @@
+import numpy as np
+from PIL import Image
+from matplotlib import pyplot as plt
+from skimage import transform, util
+from skimage import data, img_as_float
+from skimage.util import img_as_ubyte
+import cv2
+import sys
+
+# code from adapted chandler gatenbee and brian white
+# https://github.com/IAWG-CSBC-PSON/registration-challenge
+
+def match_keypoints(moving, target, feature_detector):
+    '''
+    :param moving: image that is to be warped to align with target image
+    :param target: image to which the moving image will be aligned
+    :param feature_detector: a feature detector from opencv
+    :return:
+    '''
+
+    kp1, desc1 = feature_detector.detectAndCompute(moving, None)
+    kp2, desc2 = feature_detector.detectAndCompute(target, None)
+
+    matcher = cv2.BFMatcher(normType=cv2.NORM_L2, crossCheck=True)
+    matches = matcher.match(desc1, desc2)
+
+    src_match_idx = [m.queryIdx for m in matches]
+    dst_match_idx = [m.trainIdx for m in matches]
+
+    src_points = np.float32([kp1[i].pt for i in src_match_idx])
+    dst_points = np.float32([kp2[i].pt for i in dst_match_idx])
+
+    H, mask = cv2.findHomography(src_points, dst_points, cv2.RANSAC, ransacReprojThreshold=10)
+
+    good = [matches[i] for i in np.arange(0, len(mask)) if mask[i] == [1]]
+
+    filtered_src_match_idx = [m.queryIdx for m in good]
+    filtered_dst_match_idx = [m.trainIdx for m in good]
+
+    filtered_src_points = np.float32([kp1[i].pt for i in filtered_src_match_idx])
+    filtered_dst_points = np.float32([kp2[i].pt for i in filtered_dst_match_idx])
+
+    return filtered_src_points, filtered_dst_points
+
+def apply_transform(moving, target, moving_pts, target_pts, transformer, output_shape_rc=None):
+    '''
+    :param transformer: transformer object from skimage. See https://scikit-image.org/docs/dev/api/skimage.transform.html for different transformations
+    :param output_shape_rc: shape of warped image (row, col). If None, uses shape of traget image
+    return
+    '''
+    if output_shape_rc is None:
+        output_shape_rc = target.shape[:2]
+
+    if str(transformer.__class__) == "<class 'skimage.transform._geometric.PolynomialTransform'>":
+        transformer.estimate(target_pts, moving_pts)
+        warped_img = transform.warp(moving, transformer, output_shape=output_shape_rc)
+
+        ### Restimate to warp points
+        transformer.estimate(moving_pts, target_pts)
+        warped_pts = transformer(moving_pts)
+    else:
+        transformer.estimate(moving_pts, target_pts)
+        warped_img = transform.warp(moving, transformer.inverse, output_shape=output_shape_rc)
+        warped_pts = transformer(moving_pts)
+
+    return warped_img, warped_pts
+
+def keypoint_distance(moving_pts, target_pts, img_h, img_w):
+    dst = np.sqrt(np.sum((moving_pts - target_pts)**2, axis=1)) / np.sqrt(img_h**2 + img_w**2)
+    return np.mean(dst)
+
+
+
+
+def register(target_file,moving_file, b_plot=False):
+    s_round = moving_file.split('_')[0]
+    s_sample = moving_file.split('_')[2]
+    print(s_round)
+    target = img_as_ubyte(img_as_float(Image.open(target_file)))
+    moving = img_as_ubyte(img_as_float(Image.open(moving_file)))
+    
+    fd = cv2.AKAZE_create()
+    #fd = cv2.KAZE_create(extended=True)
+    moving_pts, target_pts = match_keypoints(moving, target, feature_detector=fd)
+
+    transformer = transform.SimilarityTransform()
+    warped_img, warped_pts = apply_transform(moving, target, moving_pts, target_pts, transformer=transformer)
+
+    warped_img = img_as_ubyte(warped_img)
+    
+    print("Unaligned offset:", keypoint_distance(moving_pts, target_pts, moving.shape[0], moving.shape[1]))
+    print("Aligned offset:", keypoint_distance(warped_pts, target_pts, moving.shape[0], moving.shape[1]))
+    if b_plot:
+        fig, ax = plt.subplots(2,2, figsize=(10,10))
+        ax[0][0].imshow(target)
+        ax[0][0].imshow(moving, alpha=0.5)
+        ax[1][0].scatter(target_pts[:,0], -target_pts[:,1])
+        ax[1][0].scatter(moving_pts[:,0], -moving_pts[:,1])
+
+        ax[0][1].imshow(target)
+        ax[0][1].imshow(warped_img, alpha=0.5)
+        ax[1][1].scatter(target_pts[:,0], -target_pts[:,1])
+        ax[1][1].scatter(warped_pts[:,0], -warped_pts[:,1])
+        plt.savefig(f"../../QC/RegistrationPlots/{s_sample}_{s_round}_rigid_align.png", format="PNG")
+    return(moving_pts, target_pts, transformer) 
diff --git a/mplex_image/segment.py b/mplex_image/segment.py
new file mode 100755
index 0000000..972742a
--- /dev/null
+++ b/mplex_image/segment.py
@@ -0,0 +1,717 @@
+####
+# title: segment.py
+#
+# language: Python3.7
+# date: 2020-06-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 script for cell segmentation
+####
+import time
+import cellpose
+from cellpose import models
+from PIL import Image
+Image.MAX_IMAGE_PIXELS = 1000000000
+
+import os
+import skimage
+import pandas as pd
+import numpy as np
+import sys 
+import scipy
+from scipy import stats
+from scipy import ndimage as ndi
+from skimage import io, filters
+from skimage import measure, segmentation, morphology
+from numba import jit, types
+from numba.extending import overload
+from numba.experimental import jitclass
+import numba
+import mxnet as mx 
+import stat
+from mxnet import nd
+from mplex_image import preprocess
+
+#set src path (CHANGE ME)
+s_src_path = '/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF'
+
+#functions
+
+def gpu_device():
+    try:
+        _ = mx.nd.array([1, 2, 3], ctx=mx.gpu())
+        mx_gpu = mx.gpu()
+    except mx.MXNetError:
+        return None
+    return mx_gpu
+
+def cellpose_nuc(key,dapi,diameter=30):
+    '''
+    smallest nuclei are about 9 pixels, lymphocyte is 15 pixels, tumor is 25 pixels
+    using 20 can capture large tumor cells, without sacrificing smaller cells,
+    '''
+    try:
+        nd_array = mx.nd.array([1, 2, 3], ctx=mx.gpu())
+        print(nd_array)
+        mx_gpu = mx.gpu()
+    except mx.MXNetError:
+        print('Mxnet error')
+        mx_gpu = None
+    model = models.Cellpose(model_type='nuclei',device=mx_gpu)
+    newkey = f"{key.split(' - Z')[0]} nuclei{diameter}"
+    print(f"modelling {newkey}")
+    channels = [0,0] 
+    print(f'Minimum nuclei size = {int(np.pi*(diameter/10)**2)}')
+    masks, flows, styles, diams = model.eval(dapi, diameter=diameter, channels=channels,flow_threshold=0,min_size= int(np.pi*(diameter/10)**2))
+    return({newkey:masks})
+
+def cellpose_cell(key,zdh,diameter=25):
+    '''
+    big tumor cell is 30 pixels, lymphocyte about 18 pixels, small fibroblast 12 pixels
+    '''
+    try:
+        _ = mx.nd.array([1, 2, 3], ctx=mx.gpu())
+        mx_gpu = mx.gpu()
+    except mx.MXNetError:
+        mx_gpu = None
+    model = models.Cellpose(model_type='cyto',device=mx_gpu)
+    newkey = f"{key.split(' - Z')[0]} cell{diameter}"
+    print(f"modelling {newkey}")
+    channels = [2,3]
+    print(f'Minimum cell size = {int(np.pi*(diameter/5)**2)}')
+    masks, flows, styles, diams = model.eval(zdh, diameter=diameter, channels=channels,flow_threshold=0.6,cellprob_threshold=0.0, min_size= int(np.pi*(diameter/5)**2))
+    return({newkey:masks})
+
+def parse_org(s_end = "ORG.tif",s_start='R'):
+    """
+    This function will parse images following koei's naming convention
+    Example: Registered-R1_PCNA.CD8.PD1.CK19_Her2B-K157-Scene-002_c1_ORG.tif
+    The output is a dataframe with image filename in index
+    And rounds, color, imagetype, scene (/tissue), and marker in the columns
+    """
+    s_path = os.getcwd()
+    ls_file = []
+    for file in os.listdir():
+        if file.endswith(s_end):
+            if file.find(s_start)==0:
+                ls_file = ls_file + [file]
+    df_img = pd.DataFrame(index=ls_file)
+    df_img['rounds'] = [item.split('_')[0].split('Registered-')[1] for item in df_img.index]
+    df_img['color'] = [item.split('_')[-2] for item in df_img.index]
+    df_img['slide'] = [item.split('_')[2] for item in df_img.index]
+    df_img['marker_string'] = [item.split('_')[1] for item in df_img.index]
+    try:
+        df_img['scene'] = [item.split('-Scene-')[1] for item in df_img.slide]
+    except:
+        df_img['scene'] = '001'
+    df_img['path'] = [f"{s_path}/{item}" for item in df_img.index]
+    #parse file name for biomarker
+    for s_index in df_img.index:
+        #print(s_index)
+        s_color = df_img.loc[s_index,'color']
+        if s_color == 'c1':
+            s_marker = 'DAPI'
+        elif s_color == 'c2':
+            s_marker = s_index.split('_')[1].split('.')[0]
+        elif s_color == 'c3':
+            s_marker = s_index.split('_')[1].split('.')[1]
+        elif s_color == 'c4':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c5':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        #these are only included in sardana shading corrected images
+        elif s_color == 'c6':
+            s_marker = s_index.split('_')[1].split('.')[2]
+        elif s_color == 'c7':
+            s_marker = s_index.split('_')[1].split('.')[3]
+        else: print('Error')
+        df_img.loc[s_index,'marker'] = s_marker
+    return(df_img) 
+
+def cmif_mkdir(ls_dir):
+    '''
+    check if directories existe. if not, make them
+    '''
+    for s_dir in ls_dir:
+        if not os.path.exists(s_dir):
+            os.makedirs(s_dir)
+
+def load_single(s_find, s_scene):
+    '''
+    load a single image containing the find strin, scale, return {filename:scaled image}
+    '''
+    d_img = {}
+    for s_file in os.listdir():
+        if s_file.find(s_find)>-1:
+            a_img = io.imread(s_file)
+            a_scale = skimage.exposure.rescale_intensity(a_img,in_range=(np.quantile(a_img,0.03),1.5*np.quantile(a_img,0.9999)))
+            #d_img.update({f"{os.path.splitext(s_file)[0]}":a_scale})
+            d_img.update({f"{s_scene}":a_scale})
+    print(f'Number of images = {len(d_img)}')
+    return(d_img)
+
+def load_stack(df_img,s_find,s_scene,ls_markers,ls_rare):
+    '''
+    load an image stack in df_img, (df_img must have "path")
+    scale, get mip, return {filename:mip}
+    '''
+    d_img = {}
+    for s_file in os.listdir():
+        if s_file.find(s_find)>-1:
+            a_img = io.imread(s_file)
+            dapi = skimage.exposure.rescale_intensity(a_img,in_range=(np.quantile(a_img,0.03),1.5*np.quantile(a_img,0.9999)))
+ 
+    imgs = []
+    #images
+    df_common = df_img[df_img.marker.isin(ls_markers) & ~df_img.marker.isin(ls_rare)]
+    df_rare =  df_img[df_img.marker.isin(ls_markers) & df_img.marker.isin(ls_rare)]
+    for s_path in df_common.path:
+        #print(s_path)
+        img = io.imread(s_path)
+        img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.9999)))
+        imgs.append(img_scale)
+    for s_path in df_rare.path:
+        img = io.imread(s_path)
+        img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.99999)))
+        imgs.append(img_scale)
+    mip = np.stack(imgs).max(axis=0)
+    zdh = np.dstack((np.zeros(mip.shape),mip,dapi)).astype('uint16')
+    #name
+    #s_index = df_common.index[0]
+    #s_common_marker = df_common.loc[s_index,'marker_string']
+    #s_name = os.path.splitext(df_common.index[0])[0]
+    #s_name = s_name.replace(s_common_marker,".".join(ls_markers))
+    # name
+    s_name = f'{s_scene}_{".".join(ls_markers)}'
+    d_img.update({s_name:zdh})
+    print(f'Number of projection images = ({len(d_img)}')
+    return(d_img)
+
+def load_img(subdir,s_find,s_sample,s_scene,ls_seg_markers,ls_rare):
+    '''
+    load dapi round and cell segmentation images
+    '''
+   #image dataframe
+    os.chdir(subdir)
+    df_seg = pd.DataFrame()
+    for s_dir in os.listdir():
+        if s_dir.find(s_sample)>-1:
+            os.chdir(s_dir)
+            df_img = parse_org()
+            df_markers = df_img[df_img.marker.isin(ls_seg_markers)]
+            df_markers['path'] = [f'{subdir}/{s_dir}/{item}' for item in df_markers.index]
+            if df_img.index.str.contains(s_find).sum()==1:
+                s_file = s_dir
+                dapi = io.imread(df_img[df_img.index.str.contains(s_find)].index[0])
+            os.chdir('..')
+            df_seg = df_seg.append(df_markers)
+
+    #load z_projection DAPIs
+    os.chdir(subdir)
+    d_dapi = {}
+    d_cyto = {}
+
+    dapi_scale = skimage.exposure.rescale_intensity(dapi,in_range=(np.quantile(dapi,0.03),1.5*np.quantile(dapi,0.9999)))
+    d_dapi.update({f"{s_sample}-{s_scene}":dapi_scale})
+    imgs = []
+    #images
+    df_common = df_seg[(df_seg.scene==s_scene) & (~df_seg.marker.isin(ls_rare))]
+    df_rare =  df_seg[(df_seg.scene==s_scene) & (df_seg.marker.isin(ls_rare))]
+    for s_path in df_common.path:
+                print(s_path)
+                img = io.imread(s_path)
+                img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.9999)))
+                imgs.append(img_scale)
+    for s_path in df_rare.path:
+                img = io.imread(s_path)
+                img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.99999)))
+                imgs.append(img_scale)
+    mip = np.stack(imgs).max(axis=0)
+    zdh = np.dstack((np.zeros(mip.shape),mip,dapi)).astype('uint16')
+    d_cyto.update({f"{s_sample}-{s_scene}":zdh})
+    print(f'Number of images = {len(d_dapi)} dapi projections ({len(d_cyto)} cytoplasm projections) ')
+
+    return(d_dapi,d_cyto)
+
+def cellpose_segment_job(s_sample='SampleName',s_slide_scene="SceneName",s_find="FindDAPIString",segdir='PathtoSegmentation',imgdir='PathtoImages',nuc_diam='30',cell_diam='30',s_type='cell_or_nuclei',s_seg_markers="['Ecad']",s_rare="[]",s_match='both',s_data='cmIF',s_job='cpu'):
+    """
+    makes specific changes to template pyscripts files in Jenny's directories to result in .py file
+    Input:
+    """
+    #find template, open ,edit
+    os.chdir(f'{s_src_path}/src')
+    if s_data == 'cmIF':
+        with open('cellpose_template.py') as f:
+            s_file = f.read()
+    elif s_data == 'codex':
+        with open('cellpose_template_codex.py') as f:
+            s_file = f.read()
+    s_file = s_file.replace('SampleName',s_sample)
+    s_file = s_file.replace('SceneName',s_slide_scene)
+    s_file = s_file.replace('FindDAPIString',s_find)
+    s_file = s_file.replace('nuc_diam=int',f'nuc_diam={str(nuc_diam)}')
+    s_file = s_file.replace('cell_diam=int',f'cell_diam={str(cell_diam)}')
+    s_file = s_file.replace('cell_or_nuclei',s_type)
+    s_file = s_file.replace("['Ecad']",s_seg_markers)
+    s_file = s_file.replace("ls_rare = []",f"ls_rare = {s_rare}")
+    s_file = s_file.replace('PathtoSegmentation',segdir)
+    s_file = s_file.replace('PathtoImages',imgdir)
+    if s_match == 'match':
+        s_file = s_file.replace('#MATCHONLY',"'''")
+    elif s_match == 'seg':
+        s_file = s_file.replace('#SEGONLY',"'''")
+    if s_job == 'long':
+        with open('cellpose_template_long.sh') as f:
+            s_shell = f.read()
+    elif s_job == 'gpu':
+        with open('cellpose_template_gpu.sh') as f:
+            s_shell = f.read()
+        s_file = s_file.replace('#gpu#','')
+        s_file = s_file.replace('#SEGONLY',"'''")
+    else:
+        with open('cellpose_template.sh') as f:
+            s_shell = f.read()
+    s_shell = s_shell.replace("PythonScripName",f'cellpose_{s_type}_{s_slide_scene}.py')
+
+    #save edited .py file
+    if s_sample.find("-Scene") > -1:
+        s_sample = s_sample.split("-Scene")[0]
+        print(s_sample)
+    os.chdir(f'{segdir}')
+    with open(f'cellpose_{s_type}_{s_slide_scene}.py', 'w') as f:
+        f.write(s_file)
+
+    with open(f'cellpose_{s_type}_{s_slide_scene}.sh', 'w') as f:
+        f.write(s_shell)
+    st = os.stat(f'cellpose_{s_type}_{s_slide_scene}.sh')
+    os.chmod(f'cellpose_{s_type}_{s_slide_scene}.sh', st.st_mode | stat.S_IEXEC)
+
+def segment_spawner(s_sample,segdir,regdir,nuc_diam=30,cell_diam=30,s_type='nuclei',s_seg_markers="['Ecad']",s_job='short',s_match='both'):
+    '''
+    spawns cellpose segmentation jobs by modifying a python and bash script, saving them and calling with os.system
+    s_job='gpu' or 'long' (default = 'short')
+    s_match= 'seg' or 'match' (default = 'both')
+    '''
+    preprocess.cmif_mkdir([f'{segdir}/{s_sample}Cellpose_Segmentation'])
+    os.chdir(f'{regdir}')
+    for s_file in os.listdir():
+        if s_file.find(s_sample) > -1:
+            os.chdir(f'{regdir}/{s_file}')
+            print(f'Processing {s_file}')
+            df_img = parse_org()
+            for s_scene in sorted(set(df_img.scene)):
+                s_slide_scene= f'{s_sample}-Scene-{s_scene}'
+                s_find = df_img[(df_img.rounds=='R1') & (df_img.color=='c1') & (df_img.scene==s_scene)].index[0]
+                if os.path.exists(f'{regdir}/{s_slide_scene}'):
+                    cellpose_segment_job(s_file,s_slide_scene,s_find,f'{segdir}/{s_sample}Cellpose_Segmentation',f'{regdir}/{s_slide_scene}',nuc_diam,cell_diam,s_type,s_seg_markers,s_job=s_job, s_match=s_match)
+                elif os.path.exists(f'{regdir}/{s_sample}'):
+                    cellpose_segment_job(s_file,s_slide_scene,s_find,f'{segdir}/{s_sample}Cellpose_Segmentation',f'{regdir}/{s_sample}',nuc_diam,cell_diam,s_type,s_seg_markers,s_job=s_job, s_match=s_match)
+                os.chdir(f'{segdir}/{s_sample}Cellpose_Segmentation')
+                os.system(f'sbatch cellpose_{s_type}_{s_slide_scene}.sh')
+                time.sleep(4)
+                print('Next')
+
+def save_seg(processed_list,segdir,s_type='nuclei'):
+    '''
+    save the segmentation basins
+    '''
+
+    for item in processed_list:
+        for newkey,mask in item.items():
+            print(f"saving {newkey.split(' - ')[0]} {s_type} Basins")
+            if s_type=='nuclei':
+                io.imsave(f"{segdir}/{newkey} - Nuclei Segmentation Basins.tif", mask) #Scene 002 - Nuclei Segmentation Basins.tif
+            elif s_type=='cell':
+                io.imsave(f"{segdir}/{newkey} - Cell Segmentation Basins.tif", mask) #Scene 002 - Nuclei Segmentation Basins.tif
+
+def save_img(d_img, segdir,s_type='nuclei',ls_seg_markers=[]):
+    '''
+    save the segmentation basins
+    '''
+    #save dapi or save the cyto projection
+    if s_type=='nuclei':
+        for key,dapi in d_img.items():
+            print('saving DAPI')
+            print(key)
+            io.imsave(f"{segdir}/{key} - DAPI.png",dapi)
+    elif s_type=='cell':
+        for key,zdh in d_img.items():
+            print('saving Cyto Projection')
+            io.imsave(f"{segdir}/{key.split(' - ')[0]} - {'.'.join(ls_seg_markers)}_CytoProj.png",(zdh/255).astype('uint8'))
+
+    else:
+        print('choose nuceli or cell')
+
+# numba functions
+kv_ty = (types.int64, types.int64)
+
+@jitclass([('d', types.DictType(*kv_ty)),
+           ('l', types.ListType(types.float64))])
+class ContainerHolder(object):
+    def __init__(self):
+        # initialize the containers
+        self.d = numba.typed.Dict.empty(*kv_ty)
+        self.l = numba.typed.List.empty_list(types.float64)
+
+@overload(np.array)
+def np_array_ol(x):
+    if isinstance(x, types.Array):
+        def impl(x):
+            return np.copy(x)
+        return impl
+
+@numba.njit
+def test(a):
+    b = np.array(a)
+
+# numba function
+    '''
+    use numba to quickly iterate over each label and replace pixels with new pixel values
+    Input:
+    container = numba container class, with key-value pairs of old-new cell IDs
+    labels: numpy array with labels to rename
+        #cell_labels = np.where(np.array(cell_labels,dtype=np.int64)==key, value, np.array(labels,dtype=np.int64))
+    '''
+
+@jit(nopython=True)
+def relabel_numba(container,cell_labels):
+    '''
+    faster; replace pixels accorind to dictionsry (i.e. numba container)
+    key is original cell label, value is replaced label
+    '''
+    cell_labels = np.array(cell_labels)
+    for key, value in container.d.items():
+        cell_labels = np.where(cell_labels==key, value, cell_labels)
+    print('done matching')
+    return(cell_labels)
+
+def relabel_numpy(d_replace,cell_labels):
+    '''
+    slow replace pixels accorind to dictionary 
+    key is original cell label, value is replaced label
+    '''
+    #key is original cell albel, value is replaced label
+    for key, value in d_replace.items():
+        cell_labels = np.where(cell_labels==key, value, cell_labels)
+    print('done matching')
+    return(cell_labels)
+
+def relabel_gpu(d_replace,cell_labels):
+    '''
+    not implemented yet
+    key is original cell label, value is replaced label
+    '''
+    #key is original cell albel, value is replaced label
+    for key, value in d_replace.items():
+        cell_labels = np.where(cell_labels==key, value, cell_labels)
+    print('done mathcing')
+    return(cell_labels)
+
+def nuc_to_cell_new(labels,cell_labels):
+    '''
+    problem - still not giving same result as original function
+    associate the largest nucleaus contained in each cell segmentation
+    Input:
+    labels: nuclear labels
+    cell_labels: cell labels that need to be matched
+    Ouput:
+    container: numba container of key-value pairs of old-new cell IDs
+    '''
+    start = time.time()
+    #dominant nuclei
+    props = measure.regionprops_table(cell_labels,labels, properties=(['intensity_image','image','label']))
+    df_prop = pd.DataFrame(props)
+    d_replace = {}
+    for idx in df_prop.index[::-1]:
+        label_id = df_prop.loc[idx,'label']
+        intensity_image = df_prop.loc[idx,'intensity_image']
+        image = df_prop.loc[idx,'image']
+        nuc_labels = intensity_image[image & intensity_image!=0]
+        if len(nuc_labels) == 0:
+            d_replace.update({label_id:0}) 
+        elif len(np.unique(nuc_labels)) == 1:
+            d_replace.update({label_id:nuc_labels[0]})
+        else:
+            new_id = scipy.stats.mode(nuc_labels)[0][0]
+            d_replace.update({label_id:new_id})
+
+    #convert to numba container
+    container = ContainerHolder()
+    for key, value in d_replace.items():
+        container.d[key] = value
+    end = time.time()
+    print(end - start)
+    return(container,d_replace, df_prop) 
+
+def nuc_to_cell(labels,cell_labels):
+    '''
+    associate the largest nucleaus contained in each cell segmentation
+    Input:
+    labels: nuclear labels
+    cell_labels: cell labels that need to be matched
+    Ouput:
+    container: numba container of key-value pairs of old-new cell IDs
+    '''
+    start = time.time()
+    #dominant nuclei
+    d_replace = {}
+    for idx in np.unique(cell_labels)[::-1]:
+        if idx == 0:
+            continue
+        #iterate over each cell label, find all non-zero values contained within that mask
+        cell_array = labels[cell_labels == idx]
+        cell_array =cell_array[cell_array !=0]
+        #for multiple nuclei, choose largest (most common pixels, i.e. mode)
+        if len(np.unique(cell_array)) > 1:
+            new_id = scipy.stats.mode(cell_array, axis=0)[0][0]
+            d_replace.update({idx:new_id})
+        elif len(np.unique(cell_array)) == 1:
+            d_replace.update({idx:cell_array[0]})
+        else:
+            d_replace.update({idx:0})
+    #fix matching bug
+    d_replace = {item[0]:item[1] for item in sorted(d_replace.items(), key=lambda x: x[1], reverse=True)}
+    #convert to numba container
+    container = ContainerHolder()
+    for key, value in d_replace.items():
+        container.d[key] = value
+    end = time.time()
+    print(end - start)
+    return(container,d_replace)
+
+########## OLD ##############
+
+def zero_background(cells_relabel):
+    '''
+    in a labelled cell image, set the background to zero
+    '''
+    mode = stats.mode(cells_relabel,axis=0)[0][0][0]
+    black = cells_relabel.copy()
+    black[black==mode] = 0
+    return(black)
+
+def nuc_to_cell_watershed(labels,cell_labels,i_small=200):
+    '''
+    associate the largest nucleus contained in each cell segmentation
+    Input:
+    labels: nuclear labels
+    cell_labels: cell labels that need to be matched
+    Ouput:
+    new_cell_labels: shrunk so not touching and cleaned of small objects < i_small
+    container: numba container of key-value pairs of old-new cell IDs
+    d_replace: python dictionary of key-value pairs
+    '''
+    #cells
+    cell_boundaries = segmentation.find_boundaries(cell_labels,mode='outer')
+    shrunk_cells = cell_labels.copy()
+    shrunk_cells[cell_boundaries] = 0
+    foreground = shrunk_cells != 0
+    foreground_cleaned = morphology.remove_small_objects(foreground, i_small)
+    background = ~foreground_cleaned
+    shrunk_cells[background] = 0
+    #problem when we filter
+    #new_cell_labels = measure.label(foreground_cleaned, background=0)
+
+    #nuclei
+    cut_labels = labels.copy()
+    background = ~foreground_cleaned
+    cut_labels[background] = 0
+    labels_in = morphology.remove_small_objects(cut_labels, i_small)
+    cleaned_nuclei = labels_in
+    distance = ndi.distance_transform_edt(foreground_cleaned)
+    labels_out = segmentation.watershed(-distance, labels_in, mask=foreground_cleaned)
+
+    #dominant nuclei
+    props = measure.regionprops_table(shrunk_cells,labels_out, properties=('min_intensity','max_intensity','mean_intensity'))
+    df_prop = pd.DataFrame(props)
+    d_replace = {}
+    for idx in df_prop.index[::-1]:
+        #iterate over each cell label, find all non-zero values of watershed expansioncontained within that mask 
+        cell_array = labels_out[shrunk_cells == idx]
+        if len(np.unique(cell_array)) > 1:
+            new_id = scipy.stats.mode(cell_array, axis=0)[0][0]
+            d_replace.update({idx:new_id})
+        elif len(np.unique(cell_array)) == 1:
+            d_replace.update({idx:cell_array[0]})
+        else:
+            d_replace.update({idx:0})
+    #convert to numba container
+    container = ContainerHolder()
+    for key, value in d_replace.items():
+        container.d[key] = value
+
+    return(container)
+
+def save_seg_z(processed_list,segdir,s_type='nuclei'):
+    '''
+    save the segmentation basins
+    '''
+
+    for item in processed_list:
+        for newkey,mask in item.items():
+            print(f"saving {newkey.split(' - Z')[0]} {s_type} Basins")
+            if s_type=='nuclei':
+                io.imsave(f"{segdir}/{newkey} - Nuclei Segmentation Basins.tif", mask) #Scene 002 - Nuclei Segmentation Basins.tif
+            elif s_type=='cell':
+                io.imsave(f"{segdir}/{newkey} - Cell Segmentation Basins.tif", mask) #Scene 002 - Nuclei Segmentation Basins.tif
+
+def cellpose_segment_parallel(d_img,s_type='nuclei'):
+    '''
+    Dont use/ segment nuclei or cell
+    '''
+    if s_type=='nuclei':
+        print('segmenting nuclei')
+        if __name__ == "__main__":
+            processed_list = Parallel(n_jobs=len(d_img))(delayed(cellpose_nuc)(key,img,diameter=nuc_diam) for key,img in d_img.items())
+
+    elif s_type=='cell':
+        print('segmenting cells')
+        if __name__ == "__main__":
+            processed_list = Parallel(n_jobs=len(d_img))(delayed(cellpose_cell)(key,img,diameter=cell_diam) for key,img in d_img.items())
+
+    else:
+        print('choose nuceli or cell')
+    return(processed_list)
+
+def save_img_z(d_img, segdir,s_type='nuclei',ls_seg_markers=[]):
+    '''
+    save the segmentation basins
+    '''
+    #save dapi or save the cyto projection
+    if s_type=='nuclei':
+        for key,dapi in d_img.items():
+            print('saving DAPI')
+            io.imsave(f"{segdir}/{key}",dapi)
+    elif s_type=='cell':
+        for key,zdh in d_img.items():
+            print('saving Cyto Projection')
+            io.imsave(f"{segdir}/{key.split(' - Z')[0]} - {'.'.join(ls_seg_markers)}_CytoProj.png",(zdh/255).astype('uint8'))
+
+    else:
+        print('choose nuceli or cell')
+
+def cellpose_segment_job_z(s_sample='SampleName',s_scene="SceneName",nuc_diam='20',cell_diam='25',s_type='cell_or_nuclei',s_seg_markers="['Ecad']",s_rare="[]",codedir='PathtoCode'):
+    """
+    makes specific changes to template pyscripts files in Jenny's directories to result in .py file
+    Input:
+
+    """
+    #find template, open ,edit
+    os.chdir(f'{s_src_path}/src')
+    with open('cellpose_template_z.py') as f:
+            s_file = f.read()
+    s_file = s_file.replace('SampleName',s_sample)
+    s_file = s_file.replace('SceneName',s_scene)
+    s_file = s_file.replace('nuc_diam=int',f'nuc_diam={str(nuc_diam)}')
+    s_file = s_file.replace('cell_diam=int',f'cell_diam={str(cell_diam)}')
+    s_file = s_file.replace('cell_or_nuclei',s_type)
+    s_file = s_file.replace("['Ecad']",s_seg_markers)
+    s_file = s_file.replace("ls_rare = []",f"ls_rare = {s_rare}")
+    s_file = s_file.replace('PathtoCode',codedir)
+
+    with open('cellpose_template_z.sh') as f:
+        s_shell = f.read()
+        s_shell = s_shell.replace("PythonScripName",f'cellpose_{s_type}_{s_scene.replace(" ","-").split("_")[0]}.py')
+
+    #save edited .py file
+    os.chdir(f'{codedir}/Segmentation/{s_sample}Cellpose_Segmentation')
+    with open(f'cellpose_{s_type}_{s_scene.replace(" ","-").split("_")[0]}.py', 'w') as f:
+        f.write(s_file)
+
+    with open(f'cellpose_{s_type}_{s_scene.replace(" ","-").split("_")[0]}.sh', 'w') as f:
+        f.write(s_shell)
+
+def load_scene_z(subdir,dapidir,s_sample,s_scene,ls_seg_markers,ls_rare):
+    '''
+    load dapi projection and cell segmentation images
+    '''
+   #image dataframe
+    os.chdir(subdir)
+    df_seg = pd.DataFrame()
+    for s_dir in os.listdir():
+        if s_dir.find(s_sample)>-1:
+            os.chdir(s_dir)
+            df_img = parse_org()
+            df_markers = df_img[df_img.marker.isin(ls_seg_markers)]
+            df_markers['path'] = [f'{subdir}/{s_dir}/{item}' for item in df_markers.index]
+            os.chdir('..')
+            df_seg = df_seg.append(df_markers)
+
+    #load z_projection DAPIs
+    os.chdir(dapidir)
+    d_dapi = {}
+    d_cyto = {}
+    for s_file in sorted(os.listdir()):
+        #print(s_file)
+        if s_file.find(f'{s_scene} - ZProjectionDAPI.png')>-1:
+            dapi = io.imread(s_file)
+            dapi_scale = skimage.exposure.rescale_intensity(dapi,in_range=(np.quantile(dapi,0.03),1.5*np.quantile(dapi,0.9999)))
+            d_dapi.update({s_file:dapi_scale})
+            s_scene = s_scene.split(' ')[1].split('_')[0]
+            print(s_scene)
+            imgs = []
+            #images
+            df_common = df_seg[(df_seg.scene==s_scene) & (~df_markers.marker.isin(ls_rare))]
+            df_rare =  df_seg[(df_seg.scene==s_scene) & (df_markers.marker.isin(ls_rare))]
+            for s_path in df_common.path:
+                img = io.imread(s_path)
+                img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.9999)))
+                imgs.append(img_scale)
+            for s_path in df_rare.path:
+                img = io.imread(s_path)
+                img_scale = skimage.exposure.rescale_intensity(img,in_range=(np.quantile(img,0.03),1.5*np.quantile(img,0.999999)))
+                imgs.append(img_scale)
+            mip = np.stack(imgs).max(axis=0)
+            zdh = np.dstack((np.zeros(mip.shape),mip,dapi)).astype('uint16')
+            d_cyto.update({s_file:zdh})
+    print(f'Number of images = {len(d_dapi)} dapi projections ({len(d_cyto)} cytoplasm projections) ')
+
+    return(d_dapi,d_cyto)
+
+#test code
+'''
+import napari
+#os.chdir('./Desktop/BR1506')
+labels = io.imread('Scene 059 nuclei20 - Nuclei Segmentation Basins.tif')
+cell_labels = io.imread('Scene 059 cell25 - Cell Segmentation Basins.tif')
+cyto_img = io.imread('Scene 059 - CytoProj.png')
+dapi_img = io.imread('Scene 059 - ZProjectionDAPI.png')
+viewer = napari.Viewer()
+viewer.add_labels(labels,blending='additive')
+viewer.add_labels(cell_labels,blending='additive')
+viewer.add_image(cyto_img,blending='additive')
+viewer.add_image(dapi_img,blending='additive',colormap='blue')
+#cell_boundaries = segmentation.find_boundaries(cell_labels,mode='outer')
+#viewer.add_labels(cell_boundaries,blending='additive')
+#nuclear_boundaries = segmentation.find_boundaries(labels,mode='outer')
+#viewer.add_labels(nuclear_boundaries,blending='additive',num_colors=2)
+closing = skimage.morphology.closing(cell_labels)
+viewer.add_labels(closing,blending='additive')
+container = nuc_to_cell(labels,closing)#cell_labels)
+
+#matched cell labels
+cells_relabel = relabel_numba(container[0],closing)
+#remove background
+mode = stats.mode(cells_relabel,axis=0)[0][0][0]
+black = cells_relabel.copy()
+black[black==mode] = 0
+viewer.add_labels(black,blending='additive')
+cell_boundaries = segmentation.find_boundaries(cells_relabel,mode='outer')
+viewer.add_labels(cell_boundaries,blending='additive')
+#ring
+overlap = black==labels
+viewer.add_labels(overlap, blending='additive')
+#cytoplasm
+ring_rep = black.copy()
+ring_rep[overlap] = 0
+viewer.add_labels(ring_rep, blending='additive')
+#membrane
+rim_labels = contract_membrane(black)
+viewer.add_labels(rim_labels, blending='additive')
+
+#expanded nucleus
+__,__,peri_nuc = expand_nuc(labels,distance=3)
+viewer.add_labels(peri_nuc, blending='additive')
+'''
\ No newline at end of file
diff --git a/mplex_image/visualize.py b/mplex_image/visualize.py
new file mode 100755
index 0000000..3cbdf35
--- /dev/null
+++ b/mplex_image/visualize.py
@@ -0,0 +1,387 @@
+####
+# title: analyze.py
+#
+# language: Python3.6
+# date: 2019-05-00
+# license: GPL>=v3
+# author: Jenny
+#
+# description:
+#   python3 library to visualize cyclic data and analysis
+####
+
+#load libraries
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import os
+import skimage
+from skimage import io, segmentation
+import tifffile
+import copy
+import napari
+import seaborn as sns
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import scale
+import random
+import copy
+from scipy.ndimage import distance_transform_edt
+
+#napari
+def load_crops(viewer,s_crop,s_tissue):
+    ls_color = ['blue','green','yellow','red','cyan','magenta','gray','green','yellow','red','cyan','magenta','gray',
+    'green','yellow','red','cyan','magenta','gray','gray','gray','gray','gray','gray','gray','gray']
+    print(s_crop)
+    #viewer = napari.Viewer()
+    for s_file in os.listdir():
+        if s_file.find(s_tissue)>-1:
+            if s_file.find(s_crop) > -1:
+                if s_file.find('ome.tif') > -1:
+                    with tifffile.TiffFile(s_file) as tif:
+                        array = tif.asarray()
+                        omexml_string = tif.ome_metadata
+                        for idx in range(array.shape[0]):
+                            img = array[idx]
+                            i_begin = omexml_string.find(f'Channel ID="Channel:0:{idx}" Name="')
+                            i_end = omexml_string[i_begin:].find('" SamplesPerPixel')
+                            s_marker = omexml_string[i_begin + 31:i_begin + i_end]
+                            if s_marker.find('utf-8') == 0:
+                                s_marker = 'DAPI1'
+                            print(s_marker)
+                            viewer.add_image(img,name=s_marker,rgb=False,visible=False,blending='additive',colormap=ls_color[idx],contrast_limits = (np.quantile(img,0),(np.quantile(img,0.9999)+1)*1.5))
+                elif s_file.find('SegmentationBasins') > -1:
+                    label_image = io.imread(s_file)
+                    viewer.add_labels(label_image, name='cell_seg',blending='additive',visible=False)
+                    cell_boundaries = segmentation.find_boundaries(label_image,mode='outer')
+                    viewer.add_labels(cell_boundaries,blending='additive',visible=False)
+                else:
+                    label_image = np.array([])
+                    print('')
+    return(label_image)
+
+def load_marker(viewer,s_crop,s_tissue,ls_marker=[]):
+    ls_color = ['blue','green','yellow','red','cyan','magenta','gray','green','yellow','red','cyan','magenta',
+     'gray','gray','gray','gray','gray','gray','gray','gray']
+    print(s_crop)
+    ls_marker_all = copy.copy(ls_marker)
+    for s_file in os.listdir():
+        if s_file.find(s_tissue)>-1:
+            if s_file.find(s_crop) > -1:
+                if s_file.find('ome.tif') > -1:
+                    with tifffile.TiffFile(s_file) as tif:
+                        array = tif.asarray()
+                        omexml_string = tif.ome_metadata
+                        d_result = {}
+                        for idx in range(array.shape[0]):
+                            img = array[idx]
+                            i_begin = omexml_string.find(f'Channel ID="Channel:0:{idx}" Name="')
+                            i_end = omexml_string[i_begin:].find('" SamplesPerPixel')
+                            s_marker_idx = omexml_string[i_begin + 31:i_begin + i_end]
+                            if s_marker_idx.find('utf-8') == 0:
+                                s_marker_idx = 'DAPI1'
+                            d_result.update({s_marker_idx:img})
+                        for idxs, s_marker in enumerate(ls_marker):
+                            if len(set(d_result.keys()).intersection(set([s_marker])).intersection(set(ls_marker_all))) > 0:
+                                img = d_result[s_marker]
+                                viewer.add_image(img,name=s_marker,rgb=False,visible=True,blending='additive',colormap=ls_color[idxs],contrast_limits = (np.quantile(img,0),(np.quantile(img,0.9999)+1)*1.5))
+                                ls_marker_all.remove(s_marker)
+                elif s_file.find('SegmentationBasins') > -1:
+                    label_image = io.imread(s_file)
+                else:
+                    ome_array = np.array([])
+                    print('')
+    return(d_result,label_image)
+
+def pos_label(viewer,df_pos,label_image,s_cell):
+    '''
+    df_pos = boolean dataframe, s_cell = marker name 
+    '''
+    #s_cell = df_pos.columns[df_pos.columns.str.contains(f'{s_cell}_')][0]
+    #get rid of extra cells (filtered by DAPI, etc)
+    li_index = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos.index]
+    label_image_cell = copy.deepcopy(label_image)
+    label_image_cell[~np.isin(label_image_cell, li_index)] = 0
+    li_index_cell = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos[df_pos.loc[:,s_cell]==True].index]
+    label_image_cell[~np.isin(label_image_cell,li_index_cell )] = 0
+    viewer.add_labels(label_image_cell, name=f'{s_cell.split("_")[0]}_seg',blending='additive',visible=False)
+    return(label_image_cell)
+
+def expand_labels(label_image, distance=1):
+    """Expand labels in label image by ``distance`` pixels without overlapping.
+    Given a label image, ``expand_labels`` grows label regions (connected components)
+    outwards by up to ``distance`` pixels without overflowing into neighboring regions.
+    More specifically, each background pixel that is within Euclidean distance
+    of <= ``distance`` pixels of a connected component is assigned the label of that
+    connected component.
+    Where multiple connected components are within ``distance`` pixels of a background
+    pixel, the label value of the closest connected component will be assigned (see
+    Notes for the case of multiple labels at equal distance).
+
+    Parameters
+    ----------
+    label_image : ndarray of dtype int
+        label image
+    distance : float
+        Euclidean distance in pixels by which to grow the labels. Default is one.
+    Returns
+    -------
+    enlarged_labels : ndarray of dtype int
+    Labeled array, where all connected regions have been enlarged
+    """
+    distances, nearest_label_coords = distance_transform_edt(
+        label_image == 0, return_indices=True
+    )
+    labels_out = np.zeros_like(label_image)
+    dilate_mask = distances <= distance
+    # build the coordinates to find nearest labels,
+    # in contrast to [1] this implementation supports label arrays
+    # of any dimension
+    masked_nearest_label_coords = [
+        dimension_indices[dilate_mask]
+        for dimension_indices in nearest_label_coords
+    ]
+    nearest_labels = label_image[tuple(masked_nearest_label_coords)]
+    labels_out[dilate_mask] = nearest_labels
+    return labels_out
+
+def pos_boundary(viewer,df_pos,label_image,s_cell,seed=0.82,s_type='thick'):
+    '''
+    df_pos = boolean dataframe, s_cell = marker name 
+    '''
+    #s_cell = df_pos.columns[df_pos.columns.str.contains(f'{s_cell}_')][0]
+    #get rid of extra cells (filtered by DAPI, etc)
+    li_index = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos.index]
+    label_image_cell = copy.deepcopy(label_image)
+    label_image_cell[~np.isin(label_image_cell, li_index)] = 0
+    li_index_cell = [int(item.split('_')[-1].split('cell')[1]) for item in df_pos[df_pos.loc[:,s_cell]==True].index]
+    label_image_cell[~np.isin(label_image_cell,li_index_cell )] = 0
+    cell_boundaries = segmentation.find_boundaries(label_image_cell,mode='thick')
+    if s_type == 'thick':
+        cell_boundaries_big = segmentation.find_boundaries(expand_labels(label_image_cell, distance=2),mode='thick')
+        viewer.add_labels(cell_boundaries + cell_boundaries_big, name=f'{s_cell}_seg',blending='additive',visible=False,seed=seed)
+    else:
+        viewer.add_labels(cell_boundaries, name=f'{s_cell}_seg',blending='additive',visible=False,seed=seed)
+        cell_boundaries_big = []
+    return(cell_boundaries, cell_boundaries_big)
+
+#jupyter notbook
+#load manual thresholds
+def new_thresh_csv(df_mi,d_combos):
+    #make thresh csv's
+    df_man = pd.DataFrame(index= ['global']+ sorted(set(df_mi.slide_scene)))
+    for s_type, es_marker in d_combos.items():
+        for s_marker in sorted(es_marker):
+            df_man[s_marker] = ''
+    return(df_man)
+
+def load_thresh_csv(s_sample):
+    #load
+    df_man = pd.read_csv(f'thresh_JE_{s_sample}.csv',header=0,index_col = 0)
+    #reformat the thresholds data and covert to 16 bit 
+    ls_index = df_man.index.tolist()
+    ls_index.remove('global')
+    df_thresh = pd.DataFrame(index = ls_index)
+    ls_marker = df_man.columns.tolist()
+    for s_marker in ls_marker:
+        df_thresh[f'{s_marker}_global'] = df_man[df_man.index=='global'].loc['global',f'{s_marker}']*256
+        df_thresh[f'{s_marker}_local'] = df_man[df_man.index!='global'].loc[:,f'{s_marker}']*256
+
+    df_thresh.replace(to_replace=0, value = 12, inplace=True)
+    return(df_thresh)
+
+def threshold_postive(df_thresh,df_mi):
+    '''
+    #make positive dataframe to check threhsolds #start with local, and if its not there, inesrt the global threshold
+    #note, this will break if there are two biomarker locations #
+    '''
+    ls_scene = sorted(df_thresh.index.tolist())
+    ls_sub = df_mi.columns[df_mi.dtypes=='float64'].tolist()
+    ls_other = []
+    df_pos= pd.DataFrame()
+    d_thresh_record= {}
+    for s_scene in ls_scene:
+        ls_index = df_mi[df_mi.slide_scene==s_scene].index
+        df_scene = pd.DataFrame(index=ls_index)
+        for s_marker_loc in ls_sub:
+            s_marker = s_marker_loc.split('_')[0]
+            # only threshold markers in .csv
+            if len(set([item.split('_')[0] for item in df_thresh.columns]).intersection({s_marker})) != 0:
+                #first check if local threshold exists
+                if df_thresh[df_thresh.index==s_scene].isna().loc[s_scene,f'{s_marker}_local']==False:
+                    #local
+                    i_thresh = df_thresh.loc[s_scene,f'{s_marker}_local']
+                    df_scene.loc[ls_index,s_marker_loc] = df_mi.loc[ls_index,s_marker_loc] >= i_thresh
+                #otherwise use global
+                elif df_thresh[df_thresh.index==s_scene].isna().loc[s_scene,f'{s_marker}_global']==False:
+                    i_thresh = df_thresh.loc[s_scene,f'{s_marker}_global']
+                    df_scene.loc[ls_index,s_marker_loc] = df_mi.loc[ls_index,s_marker_loc] >= i_thresh
+                else:
+                    ls_other = ls_other + [s_marker]
+                    i_thresh = np.NaN
+                d_thresh_record.update({f'{s_scene}_{s_marker}':i_thresh})
+            else:
+                ls_other = ls_other + [s_marker]
+        df_pos = df_pos.append(df_scene)
+    print(f'Did not threshold {set(ls_other)}')
+    return(d_thresh_record,df_pos)
+
+def plot_positive(s_type,d_combos,df_pos,d_thresh_record,df_xy,b_save=True):
+    ls_color = sorted(d_combos[s_type])
+    ls_bool = [len(set([item.split('_')[0]]).intersection(set(ls_color)))==1 for item in df_pos.columns]
+    ls_color = df_pos.columns[ls_bool].tolist()
+    ls_scene = sorted(set(df_xy.slide_scene))
+    ls_fig = []
+    for s_scene in ls_scene:
+        #negative cells = all cells even before dapi filtering
+        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
+        #plot
+        fig, ax = plt.subplots(2, ((len(ls_color))+1)//2, figsize=(18,12)) #figsize=(18,12)
+        ax = ax.ravel()
+        for ax_num, s_color in enumerate(ls_color):
+            s_marker = s_color.split('_')[0]
+            s_min = d_thresh_record[f"{s_scene}_{s_marker}"]
+            #positive cells = positive cells based on threshold
+            ls_pos_index = (df_pos[df_pos.loc[:,s_color]]).index
+            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
+            if len(df_color_pos)>=1:
+                #plot negative cells
+                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
+                #plot positive cells
+                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
+                      
+                ax[ax_num].axis('equal')
+                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
+                ax[ax_num].set_title(f'{s_marker} min={int(s_min)} ({len(df_color_pos)} cells)')
+            else:
+                ax[ax_num].set_title(f'{s_marker} min={(s_min)} ({(0)} cells')
+        fig.suptitle(s_scene)
+        ls_fig.append(fig)
+        if b_save:
+            fig.savefig(f'./SpatialPlots/{s_scene}_{s_type}_manual.png')
+    return(ls_fig)
+
+#gating analysis
+def prop_positive(df_data,s_cell,s_grouper):
+    #df_data['countme'] = True
+    df_cell = df_data.loc[:,[s_cell,s_grouper,'countme']].dropna()
+    df_prop = (df_cell.groupby([s_cell,s_grouper]).countme.count()/df_cell.groupby([s_grouper]).countme.count()).unstack().T
+    return(df_prop)
+
+def prop_clustermap(df_prop,df_annot,i_thresh,lut,figsize=(10,5)):
+    for s_index in df_prop.index:
+        s_subtype = df_annot.loc[s_index,'ID'] #
+        df_prop.loc[s_index, 'ID'] = s_subtype
+    species = df_prop.pop("ID")
+    row_colors = species.map(lut)
+
+    #clustermap plot wihtout the low values -drop less than i_threh % of total
+    df_plot = df_prop.fillna(0)
+    if i_thresh > 0:
+        df_plot_less = df_plot.loc[:,df_plot.sum()/len(df_plot) > i_thresh]
+    i_len = len(df_prop)
+    i_width = len(df_plot_less.columns)
+    g = sns.clustermap(df_plot_less,figsize=figsize,cmap='viridis',row_colors=row_colors)
+    return(g,df_plot_less)
+
+def prop_barplot(df_plot_less,s_cell,colormap="Spectral",figsize=(10,5),b_sort=True):
+    i_len = len(df_plot_less)
+    i_width = len(df_plot_less.columns)
+    fig,ax = plt.subplots(figsize=figsize)
+    if b_sort:
+        df_plot_less = df_plot_less.sort_index(ascending=False)
+    df_plot_less.plot(kind='barh',stacked=True,width=.9, ax=ax,colormap=colormap)
+    ax.set_title(s_cell)
+    ax.set_xlabel('Fraction Positive')
+    ax.legend(bbox_to_anchor=(1.01, 1))
+    plt.tight_layout()
+    return(fig)
+
+def plot_color_leg(lut,figsize = (2.3,3)):
+    #colors
+    series = pd.Series(lut)
+    df_color = pd.DataFrame(index=range(len(series)),columns=['subtype','color'])
+
+    series.sort_values()
+    df_color['subtype'] = series.index
+    df_color['value'] = 1
+    df_color['color'] = series.values
+
+    fig,ax = plt.subplots(figsize = figsize,dpi=100)
+    df_color.plot(kind='barh',x='subtype',y='value',width=1,legend=False,color=df_color.color,ax=ax)
+    ax.set_xticks([])
+    ax.set_ylabel('')
+    ax.set_title(f'subtype')
+    plt.tight_layout()
+    return(fig)
+
+#cluster analysis
+
+def cluster_kmeans(df_mi,ls_columns,k,b_sil=False):
+    '''
+    log2 transform, zscore and kmens cluster
+    '''
+    df_cluster_norm = df_mi.loc[:,ls_columns]
+    df_cluster_norm_one = df_cluster_norm + 1
+    df_cluster = np.log2(df_cluster_norm_one)
+
+    #select figure size
+    i_len = k
+    i_width = len(df_cluster.columns)
+
+    #scale date
+    df_scale = scale(df_cluster)
+
+    #kmeans cluster
+    kmeans = KMeans(n_clusters=k, random_state=0).fit(df_scale)
+    df_cluster.columns = [item.split('_')[0] for item in df_cluster.columns]
+    df_cluster[f'K{k}'] = list(kmeans.labels_)
+    g = sns.clustermap(df_cluster.groupby(f'K{k}').mean(),cmap="RdYlGn_r",z_score=1,figsize=(3+i_width/3,3+i_len/3))
+    if b_sil:
+        score = silhouette_score(X = df_scale, labels=list(kmeans.labels_))
+    else:
+        score = np.nan
+    return(g,df_cluster,score)
+
+def plot_clusters(df_cluster,df_xy,s_num='many'):
+    s_type = df_cluster.columns[df_cluster.dtypes=='int64'][0]
+    print(s_type)
+    ls_scene = sorted(set(df_cluster.slide_scene))
+    ls_color = sorted(set(df_cluster.loc[:,s_type].dropna()))
+    d_fig = {}
+    for s_scene in ls_scene:
+        #negative cells = all cells even before dapi filtering
+        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
+        #plot
+        if s_num == 'many':
+            fig, ax = plt.subplots(3, ((len(ls_color))+2)//3, figsize=(18,12),dpi=200)
+        else:
+            fig, ax = plt.subplots(2, 1, figsize=(7,4),dpi=200)
+        ax = ax.ravel()
+        for ax_num, s_color in enumerate(ls_color):
+            s_marker = s_color
+            #positive cells = poitive cells based on threshold
+            ls_pos_index = (df_cluster[df_cluster.loc[:,s_type]==s_color]).index
+            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
+            if len(df_color_pos)>=1:
+                #plot negative cells
+                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
+                #plot positive cells
+                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
+                  
+                ax[ax_num].axis('equal')
+                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
+                if s_num == 'many':
+                    ax[ax_num].set_xticklabels('')
+                    ax[ax_num].set_yticklabels('')
+                else:
+                    ax[0].set_xticklabels('')
+                ax[ax_num].set_title(f'{s_color} ({len(df_color_pos)} cells)')
+            else:
+                ax[ax_num].set_xticklabels('')
+                ax[ax_num].set_yticklabels('')
+                ax[ax_num].set_title(f'{s_color}  ({(0)} cells')
+        
+        fig.suptitle(s_scene)
+        d_fig.update({s_scene:fig})
+    return(d_fig)