diff --git a/bin/scrape-arrl-fcc.R b/bin/scrape-arrl-fcc.R index 9a6eaa1..27202d6 100644 --- a/bin/scrape-arrl-fcc.R +++ b/bin/scrape-arrl-fcc.R @@ -3,6 +3,9 @@ library(rvest) library(dplyr) library(tidyr) +# sudo crontab -e +# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-arrl-fcc.R">/dev/null 2>&1 + arrl_url <- "https://www.arrl.org/fcc-license-counts" # Read the page @@ -25,12 +28,16 @@ tab <- d_raw %>% bind_cols(Date = date_raw, .) %>% # Insert Tech Plus for compatibility mutate("Tech-Plus" = NA, - .before = General) + .before = General) %>% + mutate(a=NA, b=NA, c=NA, d=NA, e=NA, f=NA, g=NA, + source_name = "ARRL FCC License Counts", + source_detail = "http://www.arrl.org/fcc-license-counts") # Append table write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",", append = TRUE, quote = FALSE, - col.names = TRUE, row.names = FALSE) + col.names = TRUE, row.names = FALSE, + na = "") # Clean up table to remove any duplicates (sometimes the page isn't updated regularly) db <- read.csv("out/arrl-fcc-licenses-scraped.csv") @@ -38,5 +45,6 @@ db2 <- db %>% distinct(.keep_all = TRUE) %>% filter(Date != "Date") write.csv(db2, "out/arrl-fcc-licenses-scraped.csv", quote = F, - row.names = F) + row.names = F, + na = "")