ham-radio-licenses/bin/scrape-arrl-fcc.R
2024-04-21 16:14:02 -05:00

100 lines
3.6 KiB
R

library(rvest)
library(dplyr)
library(tidyr)
# sudo crontab -e
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-arrl-fcc.R">/dev/null 2>&1
arrl_url <- "https://www.arrl.org/fcc-license-counts"
# Read the page
d_raw <- read_html(arrl_url)
# Get date
date_raw <- d_raw %>%
html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>%
# Get date
html_text() %>%
gsub(".*, ","",.) %>%
as.Date(format = "%d-%b-%Y")
# Get table and clean up
tab <- d_raw %>%
html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>%
html_table() %>%
.[[1]] %>%
# Add date col
bind_cols(Date = date_raw, .) %>%
# Insert Tech Plus for compatibility
mutate("Tech-Plus" = NA,
.before = General) %>%
mutate(a=NA, b=NA, c=NA, d=NA, e=NA, f=NA, g=NA,
source_name = "ARRL FCC License Counts",
source_detail = "http://www.arrl.org/fcc-license-counts")
# Append table
write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")
# Clean up table to remove any duplicates (sometimes the page isn't updated regularly)
# db <- read.csv("out/arrl-fcc-licenses-scraped.csv")
# db2 <- db %>% distinct(.keep_all = TRUE) %>%
# filter(Date != "Date")
# write.csv(db2, "out/arrl-fcc-licenses-scraped.csv",
# quote = F,
# row.names = F,
# na = "")
###### HamCall
hamcall_url <- "https://hamcall.net/hamcallcounts.html"
# Read the page
hamcall_raw <- read_html(hamcall_url)
# Get date (weird for HamCall because no xpath)
hamcall_date <- hamcall_raw %>%
html_text() %>%
as.character() %>%
gsub(".*All counts current as of ", "", .) %>%
gsub("\n\r\n\r\nAll Current US Hams.*", "", .) %>%
as.Date()
# Get tables and clean up
hamcall_tables <- hamcall_raw %>%
html_elements(xpath = "//table") %>%
html_table()
hamcall_table_all_hams_raw <- hamcall_tables[[2]]
hamcall_table_class_raw <- hamcall_tables[[3]]
hamcall_table_city_raw <- hamcall_tables[[4]]
hamcall_table_state_raw <- hamcall_tables[[5]]
# Total licenses and class counts
hamcall_table_class_pivot <- hamcall_table_class_raw %>%
pivot_wider(names_from = "Class",
values_from = "Count") %>%
# Grab total and date
mutate(date = hamcall_date,
state = "TOTAL",
techplus = NA,
total = hamcall_table_all_hams_raw[1,2] %>% pull()) %>%
# Arrange columns
relocate(date, state, N, T, techplus, G, A, E, total) %>%
select(1:9) %>%
mutate(a = NA, b = NA,
club = hamcall_table_all_hams_raw[2,2] %>% pull(),
military = hamcall_table_all_hams_raw[3,2] %>% pull(),
c = NA, d = NA, e = NA,
source_name = "HamCall",
source_detail = hamcall_url)
# Append table
write.table(hamcall_table_class_pivot, file = "out/hamcall-licenses-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")