#This script should be run on 1 node for 8 hours on the cluster #The organization script loads in and filters the AR data into 12 large data frames (one for each month) #with the frequency of AR hits by location. library(dplyr) library(lubridate) #setwd("Y:/hwalcek/ar_2010_all") setwd("/depot/wwtung/data/hwalcek/ar_2010_all") #combine all csv files - csv files are in the format ar_number, so a loop is necessary for (Number in 1:1){ filename <- paste("ar_",Number,".csv", sep = "") #load in first csv file ar_all <- read.csv(filename, sep=",") } for (Number in 2:2920){ filename <- paste("ar_",Number,".csv", sep = "") #load in all csv files ar_all_data <- read.csv(filename, sep=",") #read csv files ar_all <- rbind(ar_all, ar_all_data) #combine with previously read in csv file } #save(ar_all, file="Y:/hwalcek/ar_2010_monthly/ar_all.Rdata") save(ar_all, file = "/depot/wwtung/data/hwalcek/ar_2010_monthly/ar_all.Rdata") #load("Y:/hwalcek/ar_2010_monthly/ar_all.Rdata") load("/depot/wwtung/data/hwalcek/ar_2010_monthly/ar_all.Rdata") #add separate year, month, day columns ar_all_sep <- ar_all %>% dplyr::mutate(year = lubridate::year(Time), month = lubridate::month(Time), day = lubridate::day(Time)) #divide dataframe by month ar_monthly <- group_split(ar_all_sep, ar_all_sep$month) #Frequency of AR hits by location each month: #setwd("Y:/hwalcek/ar_2010_monthly") setwd("/depot/wwtung/data/hwalcek/ar_2010_monthly") #add AR counts together to create frequency for (Number in 1:12){ month_counts <- ar_monthly[[Number]] %>% group_by(Lon, Lat)%>% transmute(count = sum(AR)) filename <- paste("ar_month_",Number,".rds", sep = "") #create file name saveRDS(month_counts, file = filename)} #save each file to an rds