2024-09-21 19:18:31 -05:00
|
|
|
|
|
|
|
# Counts of license actions by date
|
|
|
|
# Use this file to scrape a series of dates from AE7Q
|
|
|
|
|
2024-09-21 22:13:52 -05:00
|
|
|
# Set start and end date
|
2024-09-22 10:17:37 -05:00
|
|
|
date_vec <- seq(as.Date("2004-11-29"), as.Date("2024-07-22"), by="days")
|
2024-09-21 22:13:52 -05:00
|
|
|
# Randomize dates we are querying
|
|
|
|
date_vec <- sample(date_vec)
|
2024-09-21 19:18:31 -05:00
|
|
|
|
2024-09-22 10:17:37 -05:00
|
|
|
dvbackup <- date_vec
|
2024-09-22 21:47:30 -05:00
|
|
|
date_vec <- date_vec[1687:7176]
|
2024-09-22 10:17:37 -05:00
|
|
|
|
2024-09-21 19:18:31 -05:00
|
|
|
ae7q_list <- list()
|
|
|
|
for(i in 1:length(date_vec)){
|
|
|
|
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i])
|
|
|
|
print(ae7q_new_url)
|
|
|
|
|
|
|
|
# Read the page
|
|
|
|
ae7q_new_raw <- read_html(ae7q_new_url)
|
|
|
|
|
|
|
|
# Make sure the new license table exists first
|
|
|
|
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
|
|
|
|
# Get tables and clean up
|
|
|
|
ae7q_new_tables <- ae7q_new_raw %>%
|
|
|
|
html_elements(xpath = "//table") %>%
|
|
|
|
html_table()
|
|
|
|
|
|
|
|
# Find the right table by the column names
|
2024-09-22 10:17:37 -05:00
|
|
|
right_table_id <- grep(paste(c("Callsign",
|
|
|
|
"Region/ State",
|
|
|
|
"Entity Name",
|
|
|
|
"Applicant Type",
|
|
|
|
"Licensee Class",
|
|
|
|
"License Status",
|
|
|
|
"Action Type"), collapse = " "),
|
|
|
|
lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
|
|
|
|
|
2024-09-21 19:18:31 -05:00
|
|
|
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
|
|
|
|
|
2024-09-22 21:47:30 -05:00
|
|
|
ae7q_list[[i]] <- ae7q_table_new %>%
|
|
|
|
#mutate(across(everything(), ~na_if(., "\""))) %>%
|
|
|
|
mutate(across(everything(),
|
|
|
|
~ case_when(. == "\"" ~ NA,
|
|
|
|
TRUE ~ .))) %>%
|
2024-09-21 19:18:31 -05:00
|
|
|
fill(everything()) %>%
|
|
|
|
group_by(`Action Type`) %>%
|
|
|
|
summarize(count = n(), .groups = "keep") %>%
|
|
|
|
mutate(date = date_vec[i],
|
|
|
|
source = "AE7Q", source_detail = ae7q_new_url) %>%
|
|
|
|
relocate(date)
|
|
|
|
} else {
|
|
|
|
ae7q_list[[i]]<- data.frame("date" = date_vec[i],
|
2024-09-22 10:17:37 -05:00
|
|
|
"Action Type" = NA,
|
|
|
|
"count" = NA,
|
|
|
|
"source" = "AE7Q",
|
|
|
|
"source_detail" = ae7q_new_url)
|
2024-09-21 19:18:31 -05:00
|
|
|
}
|
2024-09-21 22:13:52 -05:00
|
|
|
# Wait for random time up to 10 seconds
|
2024-09-22 10:17:37 -05:00
|
|
|
Sys.sleep(sample(1:5, 1))
|
2024-09-21 19:18:31 -05:00
|
|
|
}
|
|
|
|
|
2024-09-21 22:13:52 -05:00
|
|
|
# Combine all the data and sort by date
|
|
|
|
a <- bind_rows(ae7q_list) %>%
|
2024-09-22 21:47:30 -05:00
|
|
|
arrange(date) %>%
|
|
|
|
filter(!is.na(date))
|
2024-09-21 19:18:31 -05:00
|
|
|
|
2024-09-22 21:47:30 -05:00
|
|
|
write.csv(a, "out/ae7q-actions-scraped03.csv", row.names = F)
|
2024-09-21 19:18:31 -05:00
|
|
|
|
|
|
|
|
|
|
|
|