-
Notifications
You must be signed in to change notification settings - Fork 0
/
scaling-output.do
190 lines (140 loc) · 5.44 KB
/
scaling-output.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
/*******************************************************************************
Create output from scale transformations
*******************************************************************************/
////// Set up //////
* Load the file paths
do "$gituser/_filepaths.do"
* Ado path
adopath ++ "$gituser/ado"
////// Clean the output //////
* Load the output from the scale transformation
use "$gituser/scaling/iterations100-y4-max-10obs.dta", clear
* Keep only the observations that are order preserving
keep if obj != .
* Add an identifier of iterations
gen i = _n
* Store the names of the iterations in a global
levelsof i, local(iterations)
gl iterations `iterations'
* This output is for maximizing gap : browse the gap results
sum obj, detail
* Store the maximizing obs in a global
egen double max = max(obj)
//note: we need to specify double as it is the format of the obj. var
gen max_iteration = (max == obj)
levelsof i if max_iteration==1, local(maxiteration)
gl maxiteration `maxiteration'
drop max max_iteration
* Store the different transformations in wide format
keep obj b1 b2 b3 b4 b5 b6 c i
gen type = "max"
foreach var of varlist obj b1 b2 b3 b4 b5 b6 c {
ren `var' `var'_
}
reshape wide obj b1 b2 b3 b4 b5 b6 c, i(type) j(i)
* Store the output in a tempfile to merge with test scores
tempfile scalingoutput
save `scalingoutput'
////// Add the outputs to the test scores //////
* Load test scores
use "$gituser/2_build/testcores_y1y2_wide.dta", clear
* Merge the polynomial transformations outputed from the command
gen type = "max"
merge m:1 type using `scalingoutput', nogen assert(3)
* Calculate the new testscores
foreach i in $iterations {
foreach year in 1 2 {
gen total_mle`year'_`i' = b1_`i' * (total_mle`year' - c_`i') + ///
b2_`i' * (total_mle`year' - c_`i')^2 + b3_`i' * (total_mle`year' - c_`i')^3 + ///
b4_`i' * (total_mle`year' - c_`i')^4 + b5_`i' * (total_mle`year' - c_`i')^5 + ///
b6_`i' * (total_mle`year' - c_`i')^6
}
}
** TODO: understand why some of the new test scores are missing???
* Store the non missing test scores in a global
gl missing "78 79 86"
gl rescaled : list global(iterations) - global(missing)
dis "$rescaled"
* Drop the polynomial transformation coefficients
drop obj_* b*_* c_*
* Standardize the new test scores (periods 1 and 2 combined)
*** step 1 : list the variables to reshape
local varlist
foreach i in $iterations {
local varlist `varlist' total_mle@_`i'
}
gl vartoreshape `varlist'
*** step 2 : reshape at the individual-year level
reshape long total_mle@ $vartoreshape, i(childuniqueid) j(year)
*** step 3 : standardize
foreach i in $iterations {
egen total_mle_`i'_std = std(total_mle_`i')
}
* Calculate and plot all the regressions
* Clear the eststo
eststo clear
* Run the normal reg
eststo eq0: reg total_mle reportcard if year==2
* Run the reg with the created variables
foreach i in $rescaled {
eststo eq`i': reg total_mle_`i'_std reportcard if year==2
}
* Store the regression values
estout _all, cells("b(fmt(%9.3f)) se(fmt(%9.3f)) p(fmt(%9.3f)) ci_l(fmt(%9.3f)) ci_u(fmt(%9.3f))") stats(N r2)
mat A = r(coefs)
mat B = r(stats)
* Store the regression stats (N and r2)
preserve
clear
svmat B, names(eqcol)
local statnames : rownames B
gen stat=""
forvalues i=1/`: word count `statnames'' {
replace stat=`"`: word `i' of `statnames''"' in `i'
}
gen i = "stat"
reshape wide _eq*, j(stat) i(i) string
reshape long _eq@N _eq@r2, i(i) j(iteration)
drop i
foreach var of varlist _all {
local varname = "`var'"
local newname = subinstr("`varname'", "_eq", "", .)
rename `var' `newname'
}
tempfile stats
save `stats'
restore
* Store the regression coefficients
*preserve
clear
svmat A, names(eqcol)
local coefnames : rownames A
gen var=""
forvalues i=1/`: word count `coefnames'' {
replace var=`"`: word `i' of `coefnames''"' in `i'
}
reshape long eq@b eq@se eq@p eq@ci_l eq@ci_u, i(var) j(iteration)
* Add the regression stats to them
merge m:1 iteration using `stats', assert(3) nogen
foreach var in eqb eqse eqp eqci_l eqci_u {
local varname = "`var'"
local newname = subinstr("`varname'", "eq", "", .)
rename `var' `newname'
}
* Only keep the relevant coefficients for plotting
local keep reportcard
keep if strpos("`keep'", var) > 0
* Gen the iteration id for x axis
sort b
gen x = _n
* Store the number of rescaled scores
local neqs = wordcount("$rescaled")
dis "`neqs'"
* Plot the regression coefficients and their CI
twoway (scatter b x if iteration==0, mcolor(gold)) (rcap ci_l ci_u x if iteration==0, lcolor(gold)) /// //reference reg
(scatter b x if iteration==$maxiteration, mcolor(cranberry)) (rcap ci_l ci_u x if iteration==$maxiteration, lcolor(cranberry)) /// //maximizing gap growth reg
(scatter b x if iteration>0 & iteration!=$maxiteration, mcolor(navy)) (rcap ci_l ci_u x if iteration>0 & iteration!=$maxiteration, lcolor(navy)) /// //simulated regs
, by(var, legend(off) title("Regression Coefficients On Re-Scaled Second Year Test Scores") subtitle("With `neqs' rescaled second year test scores") note("Yellow = original test score. Red = rescaled test scores maximizing the y1-y2 gap growth between T and C." "Blue = random rank-preserving rescaled test scores.")) ///
ytitle("Regression Coefficient") xtitle("Regression in Y2") yline(0)
graph export "$gituser/img/rescaled.png", as(png) name("Graph")
restore