The scripts are used to deal with data in my daily work
- Script for daily routine
- Merging metaphlan
- Merging humann
- Calculating md5sum of csv file
- Generating phyloseq object in BJ GI cancer
- Summarizing accounts of patients and samples in BJ GI cancer
- Plotting swimming plot in in BJ GI cancer
- Obtaining Reads' status from metaphlan pipeline
- phyloseq for BJ GI for three types' dataset
- Integrating phyloseq object per taxa levels into list object
- Summarizing accounts of patients and samples in BJ GI cancer (version 2)
- checking whether the sra files were successfully downloaded
- converting sra into fastq
- scp file between local and remote
- rsync files between source and destination directory
- batch run for scripts
- APAlyzer Expression
# python merge_metaphlan.py \
# -f dataset/metaphlan/metaphlan_test_filepath.tsv \
# -p merge_metaphlan \
# -t s__|t__ \
# -o result
python merge_metaphlan.py \
-f dataset/metaphlan/metaphlan_test_filepath.tsv \
-p merge_metaphlan \
-t species \
-o result
python merge_humann.py \
-f dataset/humann/Humann2_pathabundance_test_filepath.tsv \
-p merge_humann \
-d humann2 \
-o result
Rscript calculate_file_md5sum.R \
-f dataset/phenotype \
-p csv \
-o result
Rscript generate_phyloseq_for_BJ.R \
-m dataset/phenotype/bjch_sample_sequence_metadata_20220923.csv \
-p dataset/metaphlan/metaphlan2_BJ_RoundB_merge.csv \
-r RoundB \
-t metaphlan \
-n metaphlan2 \
-o result
Rscript Summarize_Patients_Samples.R \
-a dataset/phyloseq/metaphlan2_BJ_all_ps.RDS \
-p dataset/phyloseq/metaphlan2_BJ_all_ps_baseline_R6.RDS \
-o result
Rscript BJ_GI_Swimming_plot.R \
-p dataset/phenotype/bjch_sample_sequence_metadata_20220923.csv \
-c colon_cancer \
-n Swimming_Plot \
-w 10 \
-g 20 \
-o result
# fastp & kneaddata
python obtain_metaphlan_ReadsQC.py \
-f dataset/pipeline_result/BJ_RoundG \
-p BJ_RoundG_ReadsQC \
-o result
# only kneaddata
python obtain_metaphlan_ReadsQC.py \
-f dataset/pipeline_result/BJ_RoundB \
-p BJ_RoundB_ReadsQC \
-o result
Rscript generate_phyloseq_for_BJ_v2.R \
-m dataset/phenotype/bjch_sample_sequence_metadata_20221008.csv \
-p dataset/profile/metaphlan2_BJ_RoundB-G_merge.csv \
-r all \
-t metaphlan \
-n metaphlan2 \
-o result
Rscript integrate_taxa_phyloseq_for_BJ.R \
-p ./dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_total.RDS \
-c ./dataset/phyloseq/metaphlan2_class_BJ_all_ps_total.RDS \
-o ./dataset/phyloseq/metaphlan2_order_BJ_all_ps_total.RDS \
-f ./dataset/phyloseq/metaphlan2_family_BJ_all_ps_total.RDS \
-g ./dataset/phyloseq/metaphlan2_genus_BJ_all_ps_total.RDS \
-s ./dataset/phyloseq/metaphlan2_species_BJ_all_ps_total.RDS \
-n total \
-d ./result/
Rscript Summarize_Patients_Samples_v2.R \
-a dataset/phyloseq/metaphlan2_BJ_all_metadata_total.csv \
-b dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_filter.RDS \
-c dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_baseR6.RDS \
-d dataset/phyloseq/16s_BJ_all_metadata_total.csv \
-e dataset/phyloseq/16s_BJ_all_metadata_filter.csv \
-f dataset/phyloseq/16s_BJ_all_metadata_baseR6.csv \
-n Summarize_Patients_Samples \
-o result
perl get_update_sra_NCBI.pl \
-f SRR_Acc_List.txt \
-d /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra \
-o SRR_Acc_List_redownload.txt
prefetch --option-file SRR_Acc_List_redownload.txt
find /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra/ -name "*.sra" | \
perl -e 'print"SampleID\tMode\tPath\n"; while(<>){chomp; $name=(split("\/", $_))[-1]; $name=~s/.sra//g; $type = "PAIRED"; print "$name\t$type\t$_\n";}' \
> samples.path.tsv
perl sra2fq.pl \
-f samples.path.tsv \
-d fastq \
-p sra_into_fastq
sh sra_into_fastq.sh
perl get_each_scp.pl \
-f filepath.tsv \
-s xbiome_102 \
-d /share/projects/Analytics/IO/zouhua/database/ \
-o scp_file
python rsync.py \
-s /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra/ \
-d /share/projects/Analytics/IO/zouhua/database \
-p sra \
-l sra_log.tsv
python run_batch_shell.py \
-m sra_into_fastq.sh \
-o sra_into_fastq_batch.sh
Rscript APAlyzer_Expression.R \
-b bam_file.tsv \
-r RData \
-g mm9_REF.RData \
-c chr19 \
-e 3UTR \
-o result