# Counts of license actions by date # Use this file to scrape a series of dates from AE7Q # Set start and end date date_vec <- seq(as.Date("2004-11-29"), as.Date("2024-07-22"), by="days") # Randomize dates we are querying date_vec <- sample(date_vec) dvbackup <- date_vec date_vec <- date_vec[1687:7176] ae7q_list <- list() for(i in 1:length(date_vec)){ ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i]) print(ae7q_new_url) # Read the page ae7q_new_raw <- read_html(ae7q_new_url) # Make sure the new license table exists first if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){ # Get tables and clean up ae7q_new_tables <- ae7q_new_raw %>% html_elements(xpath = "//table") %>% html_table() # Find the right table by the column names right_table_id <- grep(paste(c("Callsign", "Region/ State", "Entity Name", "Applicant Type", "Licensee Class", "License Status", "Action Type"), collapse = " "), lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " "))) ae7q_table_new <- ae7q_new_tables[[right_table_id]] ae7q_list[[i]] <- ae7q_table_new %>% #mutate(across(everything(), ~na_if(., "\""))) %>% mutate(across(everything(), ~ case_when(. == "\"" ~ NA, TRUE ~ .))) %>% fill(everything()) %>% group_by(`Action Type`) %>% summarize(count = n(), .groups = "keep") %>% mutate(date = date_vec[i], source = "AE7Q", source_detail = ae7q_new_url) %>% relocate(date) } else { ae7q_list[[i]]<- data.frame("date" = date_vec[i], "Action Type" = NA, "count" = NA, "source" = "AE7Q", "source_detail" = ae7q_new_url) } # Wait for random time up to 10 seconds Sys.sleep(sample(1:5, 1)) } # Combine all the data and sort by date a <- bind_rows(ae7q_list) %>% arrange(date) %>% filter(!is.na(date)) write.csv(a, "out/ae7q-actions-scraped03.csv", row.names = F)