-
Notifications
You must be signed in to change notification settings - Fork 2
/
rate_bucket_analysis_23rd_july.do
249 lines (225 loc) · 9.52 KB
/
rate_bucket_analysis_23rd_july.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
* Change working directory
set more off
cd "E:\GSTN Data\Raw\latest-02-07-2019\intermediate_files" //Start from the intermediate files folder//
/*
* Reading in and saving comprehensive rates file
cd ..
cd "raw_files"
import excel "rate_file_master_all_rates.xlsx", sheet("Rates (2)") cellrange(A3:AA1390) firstrow clear
cd ..
cd "intermediate_files"
drop Z AA
* Cleaning up comprehensive rates file
forvalues x = 1/23 {
replace m`x'="0" if hsn_sc=="39264011" //Replace Nil by 0//
replace m`x'="0" if hsn_sc=="050790" //Replace Nil by 0//
replace m`x'="0" if hsn_sc=="19041020" //Replace Nil by 0//
replace m`x'="3" if hsn_sc=="7101" //Correct error in this entry showing 403 percent//
}
forvalues x=1/2 {
replace m`x'="0" if hsn_sc=="24868550" //Replace - by 0//
}
forvalues x = 1/23 { //Destring rate variables//
destring m`x', replace
}
rename month desc //checking rates distribution to check that there are no outliers
forvalues x=1/23{
tab m`x'
}
replace hsn_sc = subinstr(hsn_sc, " ", "",.) //Removing spaces between HSN's//
strip hsn_sc, of(" ") gen(hsn)
drop hsn_sc
rename hsn hsn_sc
replace hsn_sc="87081010" if hsn_sc=="24868550" //This entry does not exist//
drop if hsn_sc=="-"
rename desc ch_desc
gen hsn_4 = substr(hsn_sc,1,4)
order hsn_4 hsn_sc
sort hsn_4 hsn_sc
save "rates_file_all_rates.dta", replace
export excel using "rates.xlsx", replace // This is the rates file given to Shekhar Sir for assigning weights//
*/
* Reading in weighted rates file prepared by Shekhar sir and saving
cd ..
cd "raw_files"
import excel "rates_assigning_weights_new.xlsx", sheet("Sheet4") firstrow clear
cd ..
cd "intermediate_files"
save "rates_assigning_weights_new.dta", replace
* Basic cleaning/checks on weighted rates file
* Generating temporary file containing split number of each HSN
use "rates_assigning_weights_new.dta", clear
replace weights = weights*100
bysort hsn_4: egen checksum = total(weights) //Sum of all weights should be 100//
tab checksum //There should be no other value except for 100 in the table//
gen dummy = 1
gen digit = strlen(hsn_4) //HSN_4 is at either 2 digit or 4 digit//
bysort hsn_4: egen splits = total(dummy) //Count number of repetitions for each HSN//
keep hsn_4 splits digit
duplicates drop hsn_4, force //Keep unique HSN_4 and their count for merge//
preserve
keep if digit == 2
drop digit
rename hsn_4 hsn_2
save "split_tempfile_2_digit.dta", replace
restore
preserve
keep if digit == 4
drop digit
save "split_tempfile_4_digit.dta", replace
restore
* Opening HSN wise GSTR-1 data file containing collections
use "complete_gstr1_hsn_wise_with_cash_ratio.dta", clear
drop if cash_ratio_hsn == .
gen collections_cr = (liab_cr * cash_ratio_hsn)/100 //We won't be using actul collections for this analysis//
table fy, c(sum collections_cr)
merge m:1 month using "month_names.dta"
drop _merge
drop event
* Cleaning up of HSN wise GSTR-1 data file containing collections
gen chapter_raw = substr(hsn_sc,1,2) //Pick leftmost 2 digits from HSN//
gen chapter_new = chapter_raw
destring chapter_new, gen(chapter_no) force //Will drop all alphabets etc//
replace chapter_new = "00" if chapter_no==. //Put all such alphabet etc missing values into error chapter//
replace chapter_new = "00" if chapter_new=="0" //Putting single digit HSN into Chapters 1 to 9//
replace chapter_new = "01" if chapter_new=="1"
replace chapter_new = "02" if chapter_new=="2"
replace chapter_new = "03" if chapter_new=="3"
replace chapter_new = "04" if chapter_new=="4"
replace chapter_new = "05" if chapter_new=="5"
replace chapter_new = "06" if chapter_new=="6"
replace chapter_new = "07" if chapter_new=="7"
replace chapter_new = "08" if chapter_new=="8"
replace chapter_new = "09" if chapter_new=="9"
replace chapter_new = "00" if chapter_new=="77" //77= unused chapter//
drop chapter_no
table chapter_new, c(sum collections_cr)
replace cash_ratio_hsn = int(cash_ratio_hsn)
drop if collections_cr < 0 //Drop if negative collections shown//
table fy, c(sum collections_cr)
save "rates_analysis_master_file_all_rates.dta", replace
* Starting rates analysis- splitting GSTR-1 data set
* First create monthly files
forvalues x = 1/23 {
use "rates_analysis_master_file_all_rates.dta", clear
keep if month == `x'
save "rates_analysis_master_file_all_rates_m`x'.dta", replace
}
* Splitting at 2 digit level for each month
forvalues x = 1/23 {
use "rates_analysis_master_file_all_rates_m`x'.dta", clear
rename chapter_new hsn_2
merge m:1 hsn_2 using "split_tempfile_2_digit.dta" //Obtain count of repeititons for each hsn_2//
keep if _merge==3
drop _merge
drop cess_cr collections_cr chapter_raw desc //we will compute our own collections//
bysort hsn_2: asgen cash_ratio_new = cash_ratio_hsn, weights(liab_cr) //Calculate Cash credit ratio for HSN at 2 digit in each month//
collapse (sum) value_cr (mean) cash_ratio_new month fy splits, by(hsn_2)
expand splits //Weights will automatically take care of value_cr repeating//
bysort hsn_2: gen split_no = _n
rename cash_ratio_new cash_ratio_hsn
rename hsn_2 hsn
save "rates_analysis_master_file_all_rates_2_digit_collapsed_m`x'.dta", replace
}
* Splitting at 4-digit level
forvalues x = 1/23 {
use "rates_analysis_master_file_all_rates_m`x'.dta", clear
drop desc chapter_raw chapter_new
gen hsn_4 = substr(hsn_sc,1,4)
merge m:1 hsn_4 using "split_tempfile_4_digit.dta"
keep if _merge==3
drop _merge
drop cess_cr collections_cr
bysort hsn_4: asgen cash_ratio_new = cash_ratio_hsn, weights(liab_cr)
collapse (sum) value_cr (mean) cash_ratio_new month fy splits, by(hsn_4)
expand splits
bysort hsn_4: gen split_no = _n
rename cash_ratio_new cash_ratio_hsn
rename hsn_4 hsn
save "rates_analysis_master_file_all_rates_4_digit_collapsed_m`x'.dta", replace
}
* Appending all the split data of 2 digit and 4 digit level of different months
use "rates_analysis_master_file_all_rates_2_digit_collapsed_m1.dta"
append using "rates_analysis_master_file_all_rates_4_digit_collapsed_m1.dta"
forvalues x=2/23{
append using "rates_analysis_master_file_all_rates_2_digit_collapsed_m`x'.dta"
append using "rates_analysis_master_file_all_rates_4_digit_collapsed_m`x'.dta"
}
save "rates_analysis_master_file_all_rates_split_complete.dta", replace
* Merging in weights and rates
use "rates_assigning_weights_new.dta", clear
bysort hsn_4: egen checksum = total(weights) //Sum of all weights should be 1//
tab checksum //verified//
drop hsn_sc checksum
rename hsn_4 hsn
bysort hsn: gen split_no = _n
egen key = concat (hsn split_no), punct("_")
order key
sort key
reshape long m, i(key) j(month)
rename m rate
drop key
save "rates_assigning_weights_tempfile.dta", replace
* Merging
use "rates_analysis_master_file_all_rates_split_complete.dta", clear
drop splits
merge m:1 hsn split_no month using "rates_assigning_weights_tempfile.dta"
drop _merge
sort hsn month split_no
replace value_cr = value_cr * weights
gen liab_cr = (value_cr * rate)/100
gen collections_cr = (liab_cr * cash_ratio_hsn)/100
gen event = .
replace event = 1 if (month >=1 & month <=4)
replace event =2 if (month >=6 & month<=13)
replace event =3 if (month>=14 & month <=18)
replace event = 4 if (month >=19 & month <=21)
drop if event==. //Drop months that do not fall in any event//
* Generating tables
* Overall aggregates
table fy, c(sum value_cr sum liab_cr sum collections_cr)
* Aggregate- Collections
table fy rate, c(sum collections_cr)
table event rate, c(sum collections_cr)
table month rate, c(sum collections_cr)
* Aggregate Taxable Value
table fy rate, c(sum value_cr)
table event rate, c(sum value_cr)
table month rate, c(sum value_cr)
* Aggregate Liability
table fy rate, c(sum liab_cr)
table event rate, c(sum liab_cr)
table month rate, c(sum liab_cr)
* Delete unnecessary files
erase "split_tempfile_2_digit.dta"
erase "split_tempfile_4_digit.dta"
forvalues x = 1/23 {
erase "rates_analysis_master_file_all_rates_m`x'.dta"
erase "rates_analysis_master_file_all_rates_2_digit_collapsed_m`x'.dta"
erase "rates_analysis_master_file_all_rates_4_digit_collapsed_m`x'.dta"
}
* Plot graphs
gen event_desc = ""
replace event_desc = "Jul-17 to Oct-17" if event==1
replace event_desc = "Dec-17 to Jul-18" if event==2
replace event_desc = "Aug-18 to Dec-18 " if event==3
replace event_desc = "Jan-19 to Mar-19" if event==4
labmask event, values(event_desc)
collapse (sum) value_cr liab_cr collections_cr, by (rate event)
bysort event: egen collections_event = sum(collections_cr)
gen percent_collections_event = 100*(collections_cr/collections_event)
bysort event: egen value_event = sum(value_cr)
gen percent_value_event = 100*(value_cr/value_event)
bysort event: egen liab_event = sum(liab_cr)
gen percent_liab_event = 100*(liab_cr/liab_event)
cd ..
cd "output_files"
* Collections
graph bar (asis) percent_collections_event, over(event, lab(angle(vertical) labsize(tiny))) over(rate) blabel(bar, format(%10.1fc) size(tiny)) ytitle("Share in percent") title("Event-Wise Composition of Goods in Rate Buckets", size(medium)) subtitle("Collections")
graph export "rate_bucket_collections.png", width(10000) replace
* Taxable Value
graph bar (asis) percent_value_event, over(event, lab(angle(vertical) labsize(tiny))) over(rate) blabel(bar, format(%10.1fc) size(tiny)) ytitle("Share in percent") title("Event-Wise Composition of Goods in Rate Buckets", size(medium)) subtitle("Taxable Value")
graph export "rate_bucket_value.png", width(10000) replace
* Liability
graph bar (asis) percent_liab_event, over(event, lab(angle(vertical) labsize(tiny))) over(rate) blabel(bar, format(%10.1fc) size(tiny)) ytitle("Share in percent") title("Event-Wise Composition of Goods in Rate Buckets", size(medium)) subtitle("Tax Liability")
graph export "rate_bucket_liability.png", width(10000) replace