-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03combingTaxonomyandSeqs20220407.pl
113 lines (110 loc) · 2.82 KB
/
03combingTaxonomyandSeqs20220407.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/perl -w
use strict;
##
##
## combing metadata.tsv and feature-table.tsv
## to make new table with seq taxonomy hirarchy and frequency table
##
##
my $metadata = '/home/thsti/metagenomicsAnalysis/16sRNA_Obese/sample_cleaned_trimmed_Qiime2Analysis/metadata_q17len100.tsv';
my $ftable = '/home/thsti/metagenomicsAnalysis/16sRNA_Obese/sample_cleaned_trimmed_Qiime2Analysis/P40obese_sample_cleaned_trimmed_rename_OTUfeature-table_q17len100.tsv';
my $newtable = '/home/thsti/metagenomicsAnalysis/16sRNA_Obese/sample_cleaned_trimmed_Qiime2Analysis/P40obese_sample_cleaned_trimmed_rename_OTUfeature-table_q17len100_combined1.tsv';
##
##
open(TABLE, $ftable) or die "could not open the input file $ftable, $!";
open(OUT, ">$newtable") or die "could not open the output file $newtable, $!";
my ($otuTax, $otuSeq) = &MakeHash($metadata);
my %otuTax = %$otuTax;
my %otuSeq = %$otuSeq;
my $otuNum = 1;
while(my $ln =<TABLE>)
{
if($ln =~ /^#/)
{
if($ln =~ /^#OTU ID/)
{
$ln =~ s/^\s+|\s+$//;
print '#CST ID',"\t",$ln,"\t",'taxonomy',"\t",'sequence',"\n";
print OUT '#CST ID',"\t",$ln,"\t",'taxonomy',"\t",'sequence',"\n";
}
else
{
print $ln;
print OUT $ln;
}
}
else
{
$ln =~ s/^\s+|\s+$//;
my @lnA = split(/\t/, $ln);
my $ftureID = $lnA[0];
#my ($tax, $seq) = &ParsingMetaData($ftureID, @meta);
my $tax = $otuTax{$ftureID};
my $seq = $otuSeq{$ftureID};
my $num2keep;
if(exists $otuTax{$tax})
{
$num2keep = $otuTax{$tax};
}
else
{
$otuTax{$tax} = 'CST_'.$otuNum;
$num2keep = 'CST_'.$otuNum;
$otuNum++;
}
print $num2keep,"\t",$ln,"\t",$tax,"\t",$seq,"\n";
print OUT $num2keep,"\t",$ln,"\t",$tax,"\t",$seq,"\n";
undef(@lnA);
delete($otuTax{$ftureID});
delete($otuSeq{$ftureID});
}
}
close(TABLE);
close(OUT);
undef(%otuTax);
undef(%otuSeq);
undef(%otuSeq);
##
## ****************************ParsingMetaData****************************************
sub ParsingMetaData
{
my ($id, @m) = @_;
my ($tx, $sq);
#print $id,"\t",$file,"\n";
for(my $a = 0; $a<scalar@m; $a++)
{
my $l = $m[$a];
$l =~ s/^\s+|\s+$//;
my @al = split(/\t/, $l);
#print $al[0], "\n";
if($al[0] eq $id)
{
$sq = $al[1];
$tx = $al[2];
last;
}
undef(@al);
}
return($tx, $sq);
}
##
## ************************Hash table of Seq and Tax**********************************
sub MakeHash
{
my $metadata = shift;
open(META, $metadata) or die "could not open the input file $metadata, $!";
my (%taxhash, %seqhash);
while(<META>)
{
if($. > 2)
{
my $l = $_;
$l =~ s/^\s+|\s+$//;
my @al = split(/\t/, $l);
$taxhash{$al[0]} = $al[2];
$seqhash{$al[0]} = $al[1];
}
}
return(\%taxhash, \%seqhash);
close(META);
}