-
Notifications
You must be signed in to change notification settings - Fork 0
/
blog_split_per_account.R
40 lines (31 loc) · 1.11 KB
/
blog_split_per_account.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Find all tweets with certain search term and save as new subset,
# full set as well as grouped by twitter account name
# Tina Keil, [email protected], April 2022
# blog data can be big, so increase java heap
# but adjust to RAM available on your machine!
options(java.parameters = "-Xmx8000m") #8GB ram
options(scipen=999) #turn off scientific notation
#load required libraries
library(dplyr)
library(data.table) #fread is much faster for reading csv
library(readr)
#set working directory to directory of script
path = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(path)
############ END OF SETTINGS #############
#define some dirs (trailing slash required)
dir = "cleaned/" #path to input file
file = "all_blogs_cleaned.csv"
filepath <- paste0(dir,file)
############## process ##############
if (file.exists(filepath)) {
data <- fread(filepath)
} else {
stop("Can't find input file(s). Please check.")
}
accounts <- unique(data$account)
for (acc in accounts) {
adata <- data[data$account == acc,]
#save new csv per account
write.csv(adata, paste0(dir, "clean_", acc,".csv"), row.names = FALSE)
}