-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathsubmissions-by-year-month.sql
55 lines (47 loc) · 1.51 KB
/
submissions-by-year-month.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
-- Summary stats by year and month
-- For story-type submissions only. Change to `t.type = 'comment'` in `where` to get stats for comments.
select
# group
year(sec_to_timestamp(t.time)) year,
month(sec_to_timestamp(t.time)) month,
count(*) n,
# stats: authors
avg(t.time - st.first_time) avg_tenure,
avg((t.time - st.first_time) < 31536000) pct_under_year,
avg(if(st.first_time > 0, 0, 1)) pct_first_submission,
count(unique(t.by)) n_unique_authors,
count(unique(t.by))/count(*) diversity_index,
# stats: submissions
avg(length(t.title)) length_title,
avg(if(t.deleted = true, 1, 0)) pct_submissions_deleted,
avg(if(t.dead = true, 1, 0)) pct_submissions_dead,
sum(t.score) sum_score,
avg(t.score) avg_score,
stddev(t.score) stddev_score,
min(t.score) min_score,
nth(11, quantiles(t.score, 21)) median_score,
max(t.score) max_score,
sum(t.descendants) sum_descendants,
avg(t.descendants) avg_descendants,
stddev(t.descendants) stddev_descendants,
min(t.descendants) min_descendants,
nth(11, quantiles(t.descendants, 21)) median_descendants,
max(t.descendants) max_descendants
from
[fh-bigquery:hackernews.full_201510] t
left join (
select
[by],
count(*) n_submissions,
min(time) first_time,
max(time) last_time,
avg(if(deleted = true, 1, 0)) pct_author_submissions_deleted,
from [fh-bigquery:hackernews.full_201510]
group by [by]
) st on st.by = t.by
where
t.type in ('story', 'job', 'poll')
group by
year, month
order by
year, month