More work on AE7Q.

This commit is contained in:
mattbk 2024-09-22 10:17:37 -05:00
parent 419d39c569
commit 618ac4eff7
3 changed files with 3111 additions and 20 deletions

View File

@ -43,7 +43,10 @@ city <- city_raw %>% mutate(Date = as.Date(Date),
#### License Actions #### #### License Actions ####
ae7q_actions <- read.csv("data/ae7q-actions-scraped.csv") %>% ae7q_actions <- read.csv("data/ae7q-actions-scraped.csv") %>%
mutate(date = as.Date(date)) %>% mutate(date = as.Date(date)) %>%
filter(!is.na(action)) filter(!is.na(action)) %>%
arrange(date) %>%
group_by(action) %>%
mutate(mean_30 = rollmean(count, k=30, fill=NA, align='right'))
#### Plots #### #### Plots ####
@ -295,16 +298,21 @@ ggplot(data = ae7q_actions,
y = count, y = count,
color = action)) + color = action)) +
geom_line() + geom_line() +
scale_x_date(date_breaks = "1 month", geom_line(data = ae7q_actions,
date_minor_breaks = "1 week", aes(x = date,
date_labels = "%Y-%m-%d") + y = mean_30),
color = "black") +
scale_x_date(date_breaks = "5 years",
date_minor_breaks = "1 year",
date_labels = "%Y") +
facet_wrap(~action, facet_wrap(~action,
scales = "free_y") + scales = "free_y") +
theme_bw() + theme_bw() +
labs(title = "US Amateur License Actions", labs(title = "US Amateur License Actions",
subtitle = "with 30-day rolling mean",
y = "Count", y = "Count",
x = "Date", x = "Date",
caption = "w1cdn.net", caption = "w1cdn.net; source: ae7q.com",
color = "Action") + color = "Action") +
guides(color = "none") guides(color = "none")

View File

@ -3,10 +3,13 @@
# Use this file to scrape a series of dates from AE7Q # Use this file to scrape a series of dates from AE7Q
# Set start and end date # Set start and end date
date_vec <- seq(as.Date("2024-07-23"), as.Date("2024-07-31"), by="days") date_vec <- seq(as.Date("2004-11-29"), as.Date("2024-07-22"), by="days")
# Randomize dates we are querying # Randomize dates we are querying
date_vec <- sample(date_vec) date_vec <- sample(date_vec)
dvbackup <- date_vec
date_vec <- date_vec[1177:7176]
ae7q_list <- list() ae7q_list <- list()
for(i in 1:length(date_vec)){ for(i in 1:length(date_vec)){
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i]) ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i])
@ -23,14 +26,15 @@ for(i in 1:length(date_vec)){
html_table() html_table()
# Find the right table by the column names # Find the right table by the column names
right_table_id <- grep(c("Callsign", right_table_id <- grep(paste(c("Callsign",
"Region/ State", "Region/ State",
"Entity Name", "Entity Name",
"Applicant Type", "Applicant Type",
"Licensee Class", "Licensee Class",
"License Status", "License Status",
"Action Type"), "Action Type"), collapse = " "),
lapply(ae7q_new_tables, names)) lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
ae7q_table_new <- ae7q_new_tables[[right_table_id]] ae7q_table_new <- ae7q_new_tables[[right_table_id]]
@ -43,18 +47,18 @@ for(i in 1:length(date_vec)){
relocate(date) relocate(date)
} else { } else {
ae7q_list[[i]]<- data.frame("date" = date_vec[i], ae7q_list[[i]]<- data.frame("date" = date_vec[i],
"Action Type" = NA, "Action Type" = NA,
"count" = NA, "count" = NA,
"source" = "AE7Q", "source" = "AE7Q",
"source_detail" = ae7q_new_url) "source_detail" = ae7q_new_url)
} }
# Wait for random time up to 10 seconds # Wait for random time up to 10 seconds
Sys.sleep(sample(1:10, 1)) Sys.sleep(sample(1:5, 1))
} }
# Combine all the data and sort by date # Combine all the data and sort by date
a <- bind_rows(ae7q_list) %>% a <- bind_rows(ae7q_list) %>%
arrange(date) arrange(date)
write.csv(a, "out/ae7q-actions-scraped.csv", row.names = F) write.csv(a, "out/ae7q-actions-scraped.csv", row.names = F)

File diff suppressed because it is too large Load Diff