213 lines
8.3 KiB
R
213 lines
8.3 KiB
R
|
|
library(rvest)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
|
|
# sudo crontab -e
|
|
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1
|
|
|
|
###### ARRL ######
|
|
arrl_url <- "https://www.arrl.org/fcc-license-counts"
|
|
|
|
# Read the page
|
|
d_raw <- read_html(arrl_url)
|
|
|
|
# Get date
|
|
date_raw <- d_raw %>%
|
|
html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>%
|
|
# Get date
|
|
html_text() %>%
|
|
gsub(".*, ","",.) %>%
|
|
as.Date(format = "%d-%b-%Y")
|
|
|
|
# Get table and clean up
|
|
tab <- d_raw %>%
|
|
html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>%
|
|
html_table() %>%
|
|
.[[1]] %>%
|
|
# Add date col
|
|
bind_cols(Date = date_raw, .) %>%
|
|
# Insert Tech Plus for compatibility
|
|
mutate("Tech-Plus" = NA,
|
|
.before = General) %>%
|
|
mutate(a=NA, b=NA, c=NA, d=NA, e=NA, f=NA, g=NA,
|
|
source_name = "ARRL FCC License Counts",
|
|
source_detail = "http://www.arrl.org/fcc-license-counts")
|
|
|
|
# Append table
|
|
write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",",
|
|
append = TRUE, quote = FALSE,
|
|
col.names = F, row.names = FALSE,
|
|
na = "")
|
|
|
|
# Clean up table to remove any duplicates (sometimes the page isn't updated regularly)
|
|
# db <- read.csv("out/arrl-fcc-licenses-scraped.csv")
|
|
# db2 <- db %>% distinct(.keep_all = TRUE) %>%
|
|
# filter(Date != "Date")
|
|
# write.csv(db2, "out/arrl-fcc-licenses-scraped.csv",
|
|
# quote = F,
|
|
# row.names = F,
|
|
# na = "")
|
|
|
|
###### HamCall ######
|
|
hamcall_url <- "https://hamcall.net/hamcallcounts.html"
|
|
|
|
# Read the page
|
|
hamcall_raw <- read_html(hamcall_url)
|
|
|
|
# Get date (weird for HamCall because no xpath)
|
|
hamcall_date <- hamcall_raw %>%
|
|
html_text() %>%
|
|
as.character() %>%
|
|
gsub(".*All counts current as of ", "", .) %>%
|
|
gsub("\n\r\n\r\nAll Current US Hams.*", "", .) %>%
|
|
as.Date()
|
|
|
|
# Get tables and clean up
|
|
hamcall_tables <- hamcall_raw %>%
|
|
html_elements(xpath = "//table") %>%
|
|
html_table()
|
|
|
|
hamcall_table_all_hams_raw <- hamcall_tables[[2]]
|
|
hamcall_table_class_raw <- hamcall_tables[[3]]
|
|
hamcall_table_city_raw <- hamcall_tables[[4]]
|
|
hamcall_table_state_raw <- hamcall_tables[[5]]
|
|
|
|
# Total licenses and class counts
|
|
hamcall_table_class_pivot <- hamcall_table_class_raw %>%
|
|
pivot_wider(names_from = "Class",
|
|
values_from = "Count") %>%
|
|
# Grab total and date
|
|
mutate(date = hamcall_date,
|
|
state = "TOTAL",
|
|
techplus = NA,
|
|
total = hamcall_table_all_hams_raw[1,2] %>% pull()) %>%
|
|
# Arrange columns
|
|
relocate(date, state, N, T, techplus, G, A, E, total) %>%
|
|
select(1:9) %>%
|
|
mutate(a = NA, b = NA,
|
|
club = hamcall_table_all_hams_raw[2,2] %>% pull(),
|
|
military = hamcall_table_all_hams_raw[3,2] %>% pull(),
|
|
c = NA, d = NA, e = NA,
|
|
source_name = "HamCall",
|
|
source_detail = hamcall_url)
|
|
|
|
# City counts
|
|
hamcall_table_city <- hamcall_table_city_raw %>%
|
|
mutate(date = hamcall_date,
|
|
source_name = "HamCall",
|
|
source_detail = hamcall_url) %>%
|
|
relocate(date)
|
|
|
|
# State counts
|
|
hamcall_table_state <- hamcall_table_state_raw %>%
|
|
mutate(date = hamcall_date,
|
|
source_name = "HamCall",
|
|
source_detail = hamcall_url) %>%
|
|
relocate(date, State, Count, source_name, source_detail)
|
|
|
|
# https://stackoverflow.com/questions/5411979/state-name-to-abbreviation
|
|
state_codes <- tibble(state = state.name) %>%
|
|
bind_cols(tibble(code = state.abb)) %>%
|
|
bind_rows(tibble(state = "District of Columbia", code = "DC")) %>%
|
|
bind_rows(tibble(state = "Armed Forces America", code = "AA")) %>%
|
|
bind_rows(tibble(state = "American Samoa", code = "AS")) %>%
|
|
bind_rows(tibble(state = "Armed Forces Pacific", code = "AP")) %>%
|
|
bind_rows(tibble(state = "Armed Forces Europe", code = "AE")) %>%
|
|
bind_rows(tibble(state = "Virgin Islands", code = "VI")) %>%
|
|
bind_rows(tibble(state = "Guam", code = "GU")) %>%
|
|
bind_rows(tibble(state = "Northern Mariana Islands", code = "MP")) %>%
|
|
bind_rows(tibble(state = "Puerto Rico", code = "PR"))
|
|
|
|
hamcall_table_state <- left_join(hamcall_table_state, state_codes, by = join_by(State == code), keep = F) %>%
|
|
mutate(a=NA, b=NA, c=NA, d=NA, e=NA, f=NA,
|
|
g=NA, h=NA, i=NA, j=NA, k=NA, l=NA, m=NA) %>%
|
|
select(-State) %>%
|
|
relocate(state, .after = date) %>%
|
|
relocate(Count, .after = f) %>%
|
|
relocate(source_name:source_detail, .after = m)
|
|
|
|
|
|
###### AE7Q States ######
|
|
ae7q_url <- "https://www.ae7q.com/query/stat/LicenseUSA.php"
|
|
|
|
# Read the page
|
|
ae7q_raw <- read_html(ae7q_url)
|
|
|
|
# Get tables and clean up
|
|
ae7q_tables <- ae7q_raw %>%
|
|
html_elements(xpath = "//table") %>%
|
|
html_table()
|
|
ae7q_table_state_raw <- ae7q_tables[[20]]
|
|
# Fix names
|
|
names(ae7q_table_state_raw) <- ae7q_table_state_raw[1,]
|
|
ae7q_table_state_raw <- ae7q_table_state_raw[-1,]
|
|
|
|
ae7q_table_state <- ae7q_table_state_raw %>%
|
|
pivot_longer(cols = -"State or Territory") %>%
|
|
# remove percentages
|
|
mutate(value = gsub("\\s*\\([^\\)]+\\)", "", value)) %>%
|
|
pivot_wider(id_cols = "State or Territory") %>%
|
|
# Split states
|
|
separate(`State or Territory`,
|
|
into = c("state_code", "state_name"),
|
|
sep = " - ",
|
|
fill = "right") %>%
|
|
mutate(state_name = case_when(state_code == "-" ~ "Other*",
|
|
state_code == "Totals" ~ "TOTAL",
|
|
TRUE ~ state_name)) %>%
|
|
# Organize
|
|
select(c(-GeoRegion, -state_code)) %>%
|
|
mutate(date = Sys.Date(),
|
|
ttp=NA, conditional=NA, military=NA, multiple=NA, repeater=NA,
|
|
gmrs=NA, source="AE7Q", source_detail=ae7q_url) %>%
|
|
relocate(date, state_name, Novice, Technician, TechnicianPlus,
|
|
General, Advanced, AmateurExtra, Total, ttp, conditional,
|
|
Club)
|
|
|
|
###### AE7Q New Licenses ######
|
|
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", Sys.Date())
|
|
|
|
# Read the page
|
|
ae7q_new_raw <- read_html(ae7q_new_url)
|
|
|
|
# Make sure the new license table exists first
|
|
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
|
|
# Get tables and clean up
|
|
ae7q_new_tables <- ae7q_new_raw %>%
|
|
html_elements(xpath = "//table") %>%
|
|
html_table()
|
|
|
|
ae7q_table_new <- ae7q_new_tables[[14]]
|
|
|
|
ae7q_sum01 <- ae7q_table_new %>% mutate(across(everything(), ~na_if(., "\""))) %>%
|
|
fill(everything()) %>%
|
|
group_by(`Applicant Type`,
|
|
`Licensee Class`,
|
|
`License Status`,
|
|
`Action Type`) %>%
|
|
summarize(count = n(), .groups = "keep")
|
|
}
|
|
|
|
|
|
##### Append tables #####
|
|
write.table(hamcall_table_class_pivot, file = "out/hamcall-licenses-scraped.csv", sep = ",",
|
|
append = TRUE, quote = FALSE,
|
|
col.names = F, row.names = FALSE,
|
|
na = "")
|
|
|
|
write.table(hamcall_table_city, file = "out/hamcall-cities-scraped.csv", sep = ",",
|
|
append = TRUE, quote = FALSE,
|
|
col.names = F, row.names = FALSE,
|
|
na = "")
|
|
|
|
write.table(hamcall_table_state, file = "out/hamcall-states-scraped.csv", sep = ",",
|
|
append = TRUE, quote = FALSE,
|
|
col.names = F, row.names = FALSE,
|
|
na = "")
|
|
|
|
write.table(ae7q_table_state, file = "out/ae7q-states-scraped.csv", sep = ",",
|
|
append = TRUE, quote = FALSE,
|
|
col.names = F, row.names = FALSE,
|
|
na = "")
|