-
Notifications
You must be signed in to change notification settings - Fork 1
/
aggregate_measures.do
141 lines (105 loc) · 4.84 KB
/
aggregate_measures.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
* have: a dataset with
* group variable identifiers on patents (eg gvkey, MSA, state, etc)
* time variable (eg month, quarter, or year of application or grant)
* X vars you want to aggregate to a group-time panel
cap prog drop patstat_agg
prog def patstat_agg
syntax varlist, Groupvar(varname) Timevar(varname) Depreciation(real) Windowsize(integer) [Idvar(varname)]
/*
You have a dataset with observations belonging to a group and time. Each
observation has some statistics X.
This will create a group-time dataset with
max(X) for the group over some time window [t-W,t]
avg(X) for the group over some time window [t-W,t]
stock(X) for the group over some time window [t-W,t]
where stock is sum(X*(1-deprec)^lag)/count(X). This is the average X but
weighted towards more recent time periods. It is intuitively also a stock.
This is the method of creating patent stocks in Bowen, Fresard, and Hoberg
Forthcoming.
Notes:
1. The resulting dataset will have all possible combinations of group-time. This
means the resulting dataset will likely have excess observations.
2. Any observations with missing values for group or time will be ignored.
3. Windowsize = 5 means stats cover [t-4,t]
4. Depreciation should be *per period*. If 20% a year on annual data, use 0.20.
If 20% a year on quarterly data, use 0.05.
Example:
// it replicates the stocking function in the paper:
use if retech != . using pat_lv, clear
keep if ayear > 1970 & ayear <= 2010 // keep some burn in
patstat_agg retech , g(vxfirm_id) t(aqtr) d(0.05) w(20)
drop if year(dofq(aqtr)) < 1980 // drop burn in period
rename (aqtr retech_stock) (qtr retech_stock_fcn)
merge 1:1 vxfirm qtr using startup_qtr_panel, keepusing(retech_stock) keep(3)
pwcorr *stoc*
// the function is more general. here, used on industry-year variables:
use "http://www.stata-press.com/data/r10/abdata.dta", clear
patstat_agg emp wage indoutpt , g(ind) t(year) d(0.20) w(4) i(cap)
// if you don't tell it an observation in a group is denoted by cap using
// the i() option, you have to have a variable called pnum
use "http://www.stata-press.com/data/r10/abdata.dta", clear
rename cap pnum
patstat_agg emp wage indoutpt , g(ind) t(year) d(0.20) w(4)
*/
qui{
if "`idvar'" == "" {
local idvar pnum
}
drop if missing(`groupvar') | missing(`timevar')
// set up locals for collapse and stat commands
local max_collapse
local sum_collapse
local count_collapse
local max_range
local sum_range
local count_range
foreach v in `varlist' {
local max_collapse "`max_collapse' max_`v' = `v'"
local sum_collapse "`sum_collapse' sum_`v' = `v'"
local count_collapse "`count_collapse' count_`v' = `v'"
local max_range "`max_range' `v'_max_roll`windowsize' = max_`v'"
local sum_range "`sum_range' `v'_sum_roll`windowsize' = sum_`v'"
local count_range "`count_range' `v'_count_roll`windowsize' = count_`v'"
}
// collapse to G-T panel, with each group-time's average and max patent stats within the period
collapse (sum) `sum_collapse' (max) `max_collapse' ///
(count) `count_collapse', by(`groupvar' `timevar')
// the panel should have no gaps and 0s where missing values exist
tsset `groupvar' `timevar'
tsfill, full
foreach v of varlist max_* sum_* count_* {
replace `v' = 0 if `v' == .
}
// compute the "max" within the windows (honestly), and store sums for the average pat stat in window
// yes: both of these can be done in many ways, this is kind of a "free" ride
// as we code towards the rolling stock
local lookback = 1-`windowsize' // ex: stats over [-3,0] for win length = 4
rangestat (max) `max_range' ///
(sum) `sum_range' `count_range' ///
, interval(`timevar' `lookback' 0) by(`groupvar')
foreach v in `varlist' {
g `v'_avg_roll = `v'_sum_roll`windowsize' / `v'_count_roll`windowsize'
lab var `v'_max_roll`win' "max: q+[`lookback', 0])"
lab var `v'_avg_roll`win' "avg: q+[`lookback', 0])"
}
// create the rolling window "stocks" (a rolling average weighted towards present)
// sum over patents in window ( (1-d)^t * X )
// stock ------------------------------------------
// count of patents in that window
local lookback = `windowsize' - 1 // ex Lags 0,1,2,3 for win length = 4
local perc = `depreciation'*100
sort `groupvar' `timevar'
foreach v in `varlist' {
g `v'_stock`win' = 0
forval lag = 0/`lookback' {
by `groupvar' (`timevar'): replace `v'_stock`win' = `v'_stock`win' + L`lag'.sum_`v' * (1-`depreciation')^`lag' if _n > `lag'
}
replace `v'_stock`win' = `v'_stock`win' / `v'_count_roll`windowsize'
replace `v'_stock`win' = 0 if `v'_count_roll`windowsize' == 0
lab var `v'_stock`win' "`v' stock: [t-`lookback',t] (`perc'% deprec) "
}
// output
order `groupvar' `timevar'
keep `groupvar' `timevar' *_max_roll* *_avg_roll* *_stock*
}
end