Compare commits

...

19 Commits

Author SHA1 Message Date
fe544140e6 Update data and plots. 2025-08-28 20:12:48 -05:00
3c43e62428 Update data and plots. 2025-02-24 21:14:50 -06:00
3f17d75ad2 Add data and update plots. 2024-12-28 14:12:54 -06:00
172ba6e76c Fix scrape date. 2024-11-29 09:36:26 -06:00
19ceeb55d8 Update actions data and plots. 2024-11-28 10:43:23 -06:00
1b58d335f8 Scrape AE7Q actions from *previous* day, not current day. 2024-11-28 10:22:15 -06:00
113b3d30e8 Add data and update plots. 2024-11-24 10:11:07 -06:00
8fbb0504c6 Add new data and make plots. 2024-10-26 14:23:00 -05:00
1a8ec40550 Update plot scaling. 2024-10-02 19:03:31 -05:00
82c93e6689 Update data and plots. 2024-10-02 19:01:29 -05:00
9127b43f27 Try to fix AE7Q action counts. 2024-09-29 18:10:44 -05:00
312ab95578 Update license action data and plots. 2024-09-23 09:08:30 -05:00
072af64293 Fix error handling in scrape. 2024-09-23 02:47:30 +00:00
618ac4eff7 More work on AE7Q. 2024-09-22 10:17:37 -05:00
419d39c569 Fix up actions plot, add to readme, add a little more data. 2024-09-21 22:13:52 -05:00
81e5fbef6c Stub out AE7Q actions work. 2024-09-21 19:18:31 -05:00
beeb68040f Snapshot while working on scraping. 2024-09-21 13:19:31 -05:00
9f46e49b10 Add data and update plots. 2024-09-20 21:30:52 -05:00
6177bd0b6d Scrape AE7Q states. 2024-09-20 20:36:57 -05:00
20 changed files with 58251 additions and 11 deletions

View File

@ -8,6 +8,8 @@ This is a work in progress. It is not intended to be immediately useful for
detailed analysis, but to act as a guide for further investigation. As we figure out detailed analysis, but to act as a guide for further investigation. As we figure out
how to slice up and caveat data, it will get more organized. how to slice up and caveat data, it will get more organized.
There may be errors! If something looks amiss, question it!
# Resources (not all integrated yet) # Resources (not all integrated yet)
- Various items in the [issue queue](https://amiok.net/gitea/W1CDN/ham-radio-licenses/issues) - Various items in the [issue queue](https://amiok.net/gitea/W1CDN/ham-radio-licenses/issues)
- ARRL FCC counts: https://web.archive.org/web/20150905095114/ - ARRL FCC counts: https://web.archive.org/web/20150905095114/
@ -52,3 +54,6 @@ All the data in these plots is in https://amiok.net/gitea/W1CDN/ham-radio-licens
![Alt text](plots/states-over-time-freey.png) ![Alt text](plots/states-over-time-freey.png)
![Alt text](plots/cities-over-time.png) ![Alt text](plots/cities-over-time.png)
![Alt text](plots/cities-over-time-freey.png) ![Alt text](plots/cities-over-time-freey.png)
![Alt text](plots/actions-over-time.png)
![Alt text](plots/actions-over-time-2-years.png)

View File

@ -8,6 +8,7 @@ library(plotly)
library(htmlwidgets) library(htmlwidgets)
library(lubridate) library(lubridate)
library(ggrepel) library(ggrepel)
library(zoo)
#### Total/State/Class #### #### Total/State/Class ####
# Read in total/state/class data # Read in total/state/class data
@ -40,6 +41,18 @@ city_raw <- read.csv("data/us cities ham radio licenses over time.csv")
city <- city_raw %>% mutate(Date = as.Date(Date), city <- city_raw %>% mutate(Date = as.Date(Date),
city_label = paste0(City, ", ", State)) city_label = paste0(City, ", ", State))
#### License Actions ####
ae7q_actions <- read.csv("data/ae7q-actions-scraped.csv") %>%
mutate(date = as.Date(date)) %>%
filter(!is.na(action)) %>%
arrange(date) %>%
group_by(action) %>%
mutate(mean_30 = rollmean(count, k=30, fill=NA, align='right'))
# Make sure all the dates are there
#date_vec <- seq(min(ae7q_actions$date), max(ae7q_actions$date), by="days")
#all(date_vec == unique(ae7q_actions$date))
#### Plots #### #### Plots ####
##### Total over time, y = 0 ##### ##### Total over time, y = 0 #####
@ -283,3 +296,55 @@ ggplot(data = city,
theme(legend.position="bottom") theme(legend.position="bottom")
ggsave("plots/cities-over-time-freey.png", width = 15, height = 9) ggsave("plots/cities-over-time-freey.png", width = 15, height = 9)
##### Actions Over Time #####
ggplot(data = ae7q_actions,
aes(x = date,
y = count,
color = action)) +
geom_line() +
geom_line(data = ae7q_actions,
aes(x = date,
y = mean_30),
color = "black") +
scale_x_date(date_breaks = "5 years",
date_minor_breaks = "1 year",
date_labels = "%Y") +
facet_wrap(~action,
scales = "free_y") +
theme_bw() +
labs(title = "US Amateur License Actions",
subtitle = "with 30-day rolling mean",
y = "Count",
x = "Date",
caption = "w1cdn.net; source: ae7q.com",
color = "Action") +
guides(color = "none")
ggsave("plots/actions-over-time.png", width = 6, height = 4)
##### Actions Over Time, last two years #####
ggplot(data = ae7q_actions %>% filter(date >= Sys.Date() - years(2)),
aes(x = date,
y = count,
color = action)) +
geom_line() +
geom_line(data = ae7q_actions %>% filter(date >= Sys.Date() - years(2)),
aes(x = date,
y = mean_30),
color = "black") +
scale_x_date(date_breaks = "6 months",
date_minor_breaks = "1 months",
date_labels = "%Y-%m") +
facet_wrap(~action,
scales = "free_y") +
theme_bw() +
labs(title = paste0("US Amateur License Actions since ", Sys.Date() - years(2)),
subtitle = "with 30-day rolling mean",
y = "Count",
x = "Date",
caption = "w1cdn.net; source: ae7q.com",
color = "Action") +
guides(color = "none")
ggsave("plots/actions-over-time-2-years.png", width = 9, height = 6)

70
bin/scrape-ae7q-mass.R Normal file
View File

@ -0,0 +1,70 @@
# Counts of license actions by date
# Use this file to scrape a series of dates from AE7Q
# Set start and end date
date_vec <- seq(as.Date("2024-09-22"), as.Date("2024-11-26"), by="days")
# Randomize dates we are querying
date_vec <- sample(date_vec)
dvbackup <- date_vec
#date_vec <- date_vec[1687:7176]
ae7q_list <- list()
for(i in 1:length(date_vec)){
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i])
print(ae7q_new_url)
# Read the page
ae7q_new_raw <- read_html(ae7q_new_url)
# Make sure the new license table exists first
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
# Get tables and clean up
ae7q_new_tables <- ae7q_new_raw %>%
html_elements(xpath = "//table") %>%
html_table()
# Find the right table by the column names
right_table_id <- grep(paste(c("Callsign",
"Region/ State",
"Entity Name",
"Applicant Type",
"Licensee Class",
"License Status",
"Action Type"), collapse = " "),
lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
ae7q_list[[i]] <- ae7q_table_new %>%
#mutate(across(everything(), ~na_if(., "\""))) %>%
mutate(across(everything(),
~replace(., . == "\"", NA))) %>%
fill(everything()) %>%
group_by(`Action Type`) %>%
summarize(count = n(), .groups = "keep") %>%
mutate(date = date_vec[i],
source = "AE7Q", source_detail = ae7q_new_url) %>%
relocate(date)
} else {
ae7q_list[[i]]<- data.frame("date" = date_vec[i],
"Action Type" = NA,
"count" = NA,
"source" = "AE7Q",
"source_detail" = ae7q_new_url)
}
# Wait for random time up to 10 seconds
Sys.sleep(sample(1:5, 1))
}
# Combine all the data and sort by date
a <- bind_rows(ae7q_list) %>%
arrange(date) %>%
filter(!is.na(date))
write.csv(a, "out/ae7q-actions-scraped03.csv", row.names = F)

View File

@ -6,6 +6,7 @@ library(tidyr)
# sudo crontab -e # sudo crontab -e
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1 # 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1
###### ARRL ######
arrl_url <- "https://www.arrl.org/fcc-license-counts" arrl_url <- "https://www.arrl.org/fcc-license-counts"
# Read the page # Read the page
@ -127,7 +128,7 @@ hamcall_table_state <- left_join(hamcall_table_state, state_codes, by = join_by(
relocate(source_name:source_detail, .after = m) relocate(source_name:source_detail, .after = m)
###### AE7Q ###### ###### AE7Q States ######
ae7q_url <- "https://www.ae7q.com/query/stat/LicenseUSA.php" ae7q_url <- "https://www.ae7q.com/query/stat/LicenseUSA.php"
# Read the page # Read the page
@ -137,19 +138,76 @@ ae7q_raw <- read_html(ae7q_url)
ae7q_tables <- ae7q_raw %>% ae7q_tables <- ae7q_raw %>%
html_elements(xpath = "//table") %>% html_elements(xpath = "//table") %>%
html_table() html_table()
ae7q_table_state <- ae7q_tables[[20]] ae7q_table_state_raw <- ae7q_tables[[20]]
# Fix names # Fix names
names(ae7q_table_state) <- ae7q_table_state[1,] names(ae7q_table_state_raw) <- ae7q_table_state_raw[1,]
ae7q_table_state <- ae7q_table_state[-1,] ae7q_table_state_raw <- ae7q_table_state_raw[-1,]
# TODO ae7q_table_state <- ae7q_table_state_raw %>%
# split percents out into other columns (separate_wider_delim() ?) pivot_longer(cols = -"State or Territory") %>%
# etc. # remove percentages
mutate(value = gsub("\\s*\\([^\\)]+\\)", "", value)) %>%
pivot_wider(id_cols = "State or Territory") %>%
# Split states
separate(`State or Territory`,
into = c("state_code", "state_name"),
sep = " - ",
fill = "right") %>%
mutate(state_name = case_when(state_code == "-" ~ "Other*",
state_code == "Totals" ~ "TOTAL",
TRUE ~ state_name)) %>%
# Organize
select(c(-GeoRegion, -state_code)) %>%
mutate(date = Sys.Date(),
ttp=NA, conditional=NA, military=NA, multiple=NA, repeater=NA,
gmrs=NA, source="AE7Q", source_detail=ae7q_url) %>%
relocate(date, state_name, Novice, Technician, TechnicianPlus,
General, Advanced, AmateurExtra, Total, ttp, conditional,
Club)
###### AE7Q License Actions ######
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", Sys.Date()-1)
#ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=2024-11-01")
# Read the page
ae7q_new_raw <- read_html(ae7q_new_url)
# Make sure the new license table exists first
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
# Get tables and clean up
ae7q_new_tables <- ae7q_new_raw %>%
html_elements(xpath = "//table") %>%
html_table()
# Find the right table by the column names
right_table_id <- grep(paste(c("Callsign",
"Region/ State",
"Entity Name",
"Applicant Type",
"Licensee Class",
"License Status",
"Action Type"), collapse = " "),
lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
ae7q_sum01 <- ae7q_table_new %>%
#mutate(across(everything(), ~na_if(., "\""))) %>%
mutate(across(everything(),
~replace(., . == "\"", NA))) %>%
fill(everything()) %>%
group_by(`Action Type`) %>%
summarize(count = n(), .groups = "keep") %>%
mutate(date = Sys.Date()-1,
source = "AE7Q", source_detail = ae7q_new_url) %>%
relocate(date)
} else {
ae7q_sum01<- data.frame("date" = Sys.Date(),
"Action Type" = NA,
"count" = NA,
"source" = "AE7Q",
"source_detail" = ae7q_new_url)
}
##### Append tables ##### ##### Append tables #####
@ -167,3 +225,13 @@ write.table(hamcall_table_state, file = "out/hamcall-states-scraped.csv", sep =
append = TRUE, quote = FALSE, append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE, col.names = F, row.names = FALSE,
na = "") na = "")
write.table(ae7q_table_state, file = "out/ae7q-states-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")
write.table(ae7q_sum01, file = "out/ae7q-actions-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")

16183
data/ae7q-actions-scraped.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 645 KiB

BIN
plots/actions-over-time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 212 KiB

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 633 KiB

After

Width:  |  Height:  |  Size: 634 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 609 KiB

After

Width:  |  Height:  |  Size: 616 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

After

Width:  |  Height:  |  Size: 156 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 183 KiB

After

Width:  |  Height:  |  Size: 188 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 175 KiB

After

Width:  |  Height:  |  Size: 173 KiB