Scrape AE7Q states.

This commit is contained in:
mattbk 2024-09-20 20:36:57 -05:00
parent 8f86f15abe
commit 6177bd0b6d

View File

@ -6,6 +6,7 @@ library(tidyr)
# sudo crontab -e
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1
###### ARRL ######
arrl_url <- "https://www.arrl.org/fcc-license-counts"
# Read the page
@ -137,20 +138,32 @@ ae7q_raw <- read_html(ae7q_url)
ae7q_tables <- ae7q_raw %>%
html_elements(xpath = "//table") %>%
html_table()
ae7q_table_state <- ae7q_tables[[20]]
ae7q_table_state_raw <- ae7q_tables[[20]]
# Fix names
names(ae7q_table_state) <- ae7q_table_state[1,]
ae7q_table_state <- ae7q_table_state[-1,]
# TODO
# split percents out into other columns (separate_wider_delim() ?)
# etc.
names(ae7q_table_state_raw) <- ae7q_table_state_raw[1,]
ae7q_table_state_raw <- ae7q_table_state_raw[-1,]
ae7q_table_state <- ae7q_table_state_raw %>%
pivot_longer(cols = -"State or Territory") %>%
# remove percentages
mutate(value = gsub("\\s*\\([^\\)]+\\)", "", value)) %>%
pivot_wider(id_cols = "State or Territory") %>%
# Split states
separate(`State or Territory`,
into = c("state_code", "state_name"),
sep = " - ",
fill = "right") %>%
mutate(state_name = case_when(state_code == "-" ~ "Other*",
state_code == "Totals" ~ "TOTAL",
TRUE ~ state_name)) %>%
# Organize
select(c(-GeoRegion, -state_code)) %>%
mutate(date = Sys.Date(),
ttp=NA, conditional=NA, military=NA, multiple=NA, repeater=NA,
gmrs=NA, source="AE7Q", source_detail=ae7q_url) %>%
relocate(date, state_name, Novice, Technician, TechnicianPlus,
General, Advanced, AmateurExtra, Total, ttp, conditional,
Club)
##### Append tables #####
write.table(hamcall_table_class_pivot, file = "out/hamcall-licenses-scraped.csv", sep = ",",
@ -167,3 +180,8 @@ write.table(hamcall_table_state, file = "out/hamcall-states-scraped.csv", sep =
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")
write.table(ae7q_table_state, file = "out/ae7q-states-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")