diff --git a/README.md b/README.md
index 00cbefb..c76e93e 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,28 @@ RNASeq tools
 
 A collection of tools for analysis of RNA-seq and transcriptomic data used by the [Hibberd Lab](http://hibberdlab.github.io).
 
-## Batch read processing scripts
+### setup
+
+Scripts for organising raw data, for example by processing data downloaded from sequencing services to concatenate and name files by sample. 
+
+* **prepare_samples_TGAC.rb** - parses the TGAC SampleAlias.txt file and concatenates and renames gzipped FASTQ files by sample name.
+
+### preprocess
 
 * **trim-batch** - run trimmomatic on a series of FASTQ read files, optionally trimming paired and single reads in the same run. After quality analysis, this is the first step in an RNASeq pipeline.
+  - todo:
+    - run multiple trimmomatic processes in parallel
 * **khmer-batch** - run digital normalisation on a series of FASTQ read files, preserving the kmer counting hash between runs, to create a single normalised read dataset. Useful for incorporating a new read dataset with old data to generate an improved *de-novo* assembly.
+  - todo:
+    - add option to use filter-by-abund
+
+### expression
 
+* **express_sample.rb** - run eXpress on each replicate of a sample, collating results into a single CSV.
+* **sailfish_sample.rb** - run Sailfish on each replicate of a sample, collating results into a single CSV.
+* **EBSeq_experiment.R** - run differential expression analysis using EBSeq.
+* **GO_analyse.R** - run GO term enrichment analysis.
+* **plot_GO_analysis.R** - generate plots of GO term analysis, including a representation of replative enrichment between samples, and which genes are important in each category.
 
 ## License
 
diff --git a/express_sample.rb b/expression/express_sample.rb
similarity index 100%
rename from express_sample.rb
rename to expression/express_sample.rb
diff --git a/prep_and_trim.sh b/prep_and_trim.sh
new file mode 100755
index 0000000..339e05b
--- /dev/null
+++ b/prep_and_trim.sh
@@ -0,0 +1,5 @@
+prepare_samples_TGAC.rb | tee sample_preparation.log
+trim-batch.rb --singlefile files_for_trimming.txt \
+--jar /home/rds45/apps/Trimmomatic-0.30/trimmomatic-0.30.jar \
+--adapters /data/adapters/adapters_list.fa \
+--cleanup | tee quality_adapter_trimming.log
diff --git a/generate_plots.R b/preprocess/generate_plots.R
old mode 100644
new mode 100755
similarity index 100%
rename from generate_plots.R
rename to preprocess/generate_plots.R
diff --git a/khmer-batch.rb b/preprocess/khmer-batch.rb
similarity index 100%
rename from khmer-batch.rb
rename to preprocess/khmer-batch.rb
diff --git a/rrna_filter.rb b/preprocess/rrna_filter.rb
similarity index 100%
rename from rrna_filter.rb
rename to preprocess/rrna_filter.rb
diff --git a/summarise_fastqc.rb b/preprocess/summarise_fastqc.rb
similarity index 100%
rename from summarise_fastqc.rb
rename to preprocess/summarise_fastqc.rb
diff --git a/trim-batch.rb b/preprocess/trim-batch.rb
similarity index 98%
rename from trim-batch.rb
rename to preprocess/trim-batch.rb
index 09ea90c..7016772 100755
--- a/trim-batch.rb
+++ b/preprocess/trim-batch.rb
@@ -108,8 +108,8 @@ def check_list(inlist, outlist)
   cmd = pairedcmd
   cmd = cmd.gsub(/INFILEF/, infilef)
   cmd = cmd.gsub(/INFILER/, infiler)
-  inpathl = File.dirname(infilef)
-  infilel = File.basename(infilef)
+  inpathf = File.dirname(infilef)
+  infilef = File.basename(infilef)
   inpathr = File.dirname(infiler)
   infiler = File.basename(infiler)
   cmd = cmd.gsub(/OUTFILEF/, "#{inpathf}/#{TRIMPREFIX}#{infilef}")
@@ -148,7 +148,7 @@ def check_list(inlist, outlist)
     logline['file'] = infile
     unpaired_trimlog << logline
   end
-  File.delete infile if opts.cleanup
+  # File.delete infile if opts.cleanup
 end
 
 datestr = Time.now.strftime('%d_%m_%Y_%H_%M_%S')
diff --git a/prepare_samples_TGAC.rb b/setup/prepare_samples_TGAC.rb
similarity index 100%
rename from prepare_samples_TGAC.rb
rename to setup/prepare_samples_TGAC.rb