ham-radio-licenses/bin/scrape-ae7q-mass.R
2024-09-21 19:18:31 -05:00

56 lines
2.0 KiB
R

# Counts of license actions by date
# Use this file to scrape a series of dates from AE7Q
date_vec = seq(as.Date("2024-08-01"), as.Date("2024-09-21"), by="days")
ae7q_list <- list()
for(i in 1:length(date_vec)){
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i])
print(ae7q_new_url)
# Read the page
ae7q_new_raw <- read_html(ae7q_new_url)
# Make sure the new license table exists first
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
# Get tables and clean up
ae7q_new_tables <- ae7q_new_raw %>%
html_elements(xpath = "//table") %>%
html_table()
# Find the right table by the column names
right_table_id <- grep(c("Callsign",
"Region/ State",
"Entity Name",
"Applicant Type",
"Licensee Class",
"License Status",
"Action Type"),
lapply(ae7q_new_tables, names))
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
ae7q_list[[i]] <- ae7q_table_new %>% mutate(across(everything(), ~na_if(., "\""))) %>%
fill(everything()) %>%
group_by(`Action Type`) %>%
summarize(count = n(), .groups = "keep") %>%
mutate(date = date_vec[i],
source = "AE7Q", source_detail = ae7q_new_url) %>%
relocate(date)
} else {
ae7q_list[[i]]<- data.frame("date" = date_vec[i],
"Action Type" = NA,
"count" = NA,
"source" = "AE7Q",
"source_detail" = ae7q_new_url)
}
}
a <- bind_rows(ae7q_list)
write.csv(a, "out/ae7q-actions-scraped.csv", row.names = F)