Compare commits

...

25 Commits

Author SHA1 Message Date
3c43e62428 Update data and plots. 2025-02-24 21:14:50 -06:00
3f17d75ad2 Add data and update plots. 2024-12-28 14:12:54 -06:00
172ba6e76c Fix scrape date. 2024-11-29 09:36:26 -06:00
19ceeb55d8 Update actions data and plots. 2024-11-28 10:43:23 -06:00
1b58d335f8 Scrape AE7Q actions from *previous* day, not current day. 2024-11-28 10:22:15 -06:00
113b3d30e8 Add data and update plots. 2024-11-24 10:11:07 -06:00
8fbb0504c6 Add new data and make plots. 2024-10-26 14:23:00 -05:00
1a8ec40550 Update plot scaling. 2024-10-02 19:03:31 -05:00
82c93e6689 Update data and plots. 2024-10-02 19:01:29 -05:00
9127b43f27 Try to fix AE7Q action counts. 2024-09-29 18:10:44 -05:00
312ab95578 Update license action data and plots. 2024-09-23 09:08:30 -05:00
072af64293 Fix error handling in scrape. 2024-09-23 02:47:30 +00:00
618ac4eff7 More work on AE7Q. 2024-09-22 10:17:37 -05:00
419d39c569 Fix up actions plot, add to readme, add a little more data. 2024-09-21 22:13:52 -05:00
81e5fbef6c Stub out AE7Q actions work. 2024-09-21 19:18:31 -05:00
beeb68040f Snapshot while working on scraping. 2024-09-21 13:19:31 -05:00
9f46e49b10 Add data and update plots. 2024-09-20 21:30:52 -05:00
6177bd0b6d Scrape AE7Q states. 2024-09-20 20:36:57 -05:00
8f86f15abe Update data. 2024-09-20 19:53:42 -05:00
ccfc3bd2b8 Fix #132 and update plots. 2024-09-19 21:57:26 -05:00
80f1bc5bc7 Add data and update plots. 2024-09-19 23:35:39 +00:00
e63602b908 Add data and update plots. 2024-09-18 22:15:40 +00:00
aefda00799 Update state plot. 2024-09-18 16:29:06 +00:00
447b148560 Oops.
Merge branch 'main' of https://amiok.net/gitea/W1CDN/ham-radio-licenses

# Conflicts:
#	plots/states-over-time-freey.png
2024-09-18 16:27:30 +00:00
86f7e9db77 Update plots. 2024-09-18 16:26:51 +00:00
20 changed files with 38801 additions and 61 deletions

View File

@ -8,6 +8,8 @@ This is a work in progress. It is not intended to be immediately useful for
detailed analysis, but to act as a guide for further investigation. As we figure out
how to slice up and caveat data, it will get more organized.
There may be errors! If something looks amiss, question it!
# Resources (not all integrated yet)
- Various items in the [issue queue](https://amiok.net/gitea/W1CDN/ham-radio-licenses/issues)
- ARRL FCC counts: https://web.archive.org/web/20150905095114/
@ -52,3 +54,6 @@ All the data in these plots is in https://amiok.net/gitea/W1CDN/ham-radio-licens
![Alt text](plots/states-over-time-freey.png)
![Alt text](plots/cities-over-time.png)
![Alt text](plots/cities-over-time-freey.png)
![Alt text](plots/actions-over-time.png)
![Alt text](plots/actions-over-time-2-years.png)

View File

@ -8,6 +8,7 @@ library(plotly)
library(htmlwidgets)
library(lubridate)
library(ggrepel)
library(zoo)
#### Total/State/Class ####
# Read in total/state/class data
@ -40,6 +41,18 @@ city_raw <- read.csv("data/us cities ham radio licenses over time.csv")
city <- city_raw %>% mutate(Date = as.Date(Date),
city_label = paste0(City, ", ", State))
#### License Actions ####
ae7q_actions <- read.csv("data/ae7q-actions-scraped.csv") %>%
mutate(date = as.Date(date)) %>%
filter(!is.na(action)) %>%
arrange(date) %>%
group_by(action) %>%
mutate(mean_30 = rollmean(count, k=30, fill=NA, align='right'))
# Make sure all the dates are there
#date_vec <- seq(min(ae7q_actions$date), max(ae7q_actions$date), by="days")
#all(date_vec == unique(ae7q_actions$date))
#### Plots ####
##### Total over time, y = 0 #####
@ -283,3 +296,55 @@ ggplot(data = city,
theme(legend.position="bottom")
ggsave("plots/cities-over-time-freey.png", width = 15, height = 9)
##### Actions Over Time #####
ggplot(data = ae7q_actions,
aes(x = date,
y = count,
color = action)) +
geom_line() +
geom_line(data = ae7q_actions,
aes(x = date,
y = mean_30),
color = "black") +
scale_x_date(date_breaks = "5 years",
date_minor_breaks = "1 year",
date_labels = "%Y") +
facet_wrap(~action,
scales = "free_y") +
theme_bw() +
labs(title = "US Amateur License Actions",
subtitle = "with 30-day rolling mean",
y = "Count",
x = "Date",
caption = "w1cdn.net; source: ae7q.com",
color = "Action") +
guides(color = "none")
ggsave("plots/actions-over-time.png", width = 6, height = 4)
##### Actions Over Time, last two years #####
ggplot(data = ae7q_actions %>% filter(date >= Sys.Date() - years(2)),
aes(x = date,
y = count,
color = action)) +
geom_line() +
geom_line(data = ae7q_actions %>% filter(date >= Sys.Date() - years(2)),
aes(x = date,
y = mean_30),
color = "black") +
scale_x_date(date_breaks = "6 months",
date_minor_breaks = "1 months",
date_labels = "%Y-%m") +
facet_wrap(~action,
scales = "free_y") +
theme_bw() +
labs(title = paste0("US Amateur License Actions since ", Sys.Date() - years(2)),
subtitle = "with 30-day rolling mean",
y = "Count",
x = "Date",
caption = "w1cdn.net; source: ae7q.com",
color = "Action") +
guides(color = "none")
ggsave("plots/actions-over-time-2-years.png", width = 9, height = 6)

70
bin/scrape-ae7q-mass.R Normal file
View File

@ -0,0 +1,70 @@
# Counts of license actions by date
# Use this file to scrape a series of dates from AE7Q
# Set start and end date
date_vec <- seq(as.Date("2024-09-22"), as.Date("2024-11-26"), by="days")
# Randomize dates we are querying
date_vec <- sample(date_vec)
dvbackup <- date_vec
#date_vec <- date_vec[1687:7176]
ae7q_list <- list()
for(i in 1:length(date_vec)){
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", date_vec[i])
print(ae7q_new_url)
# Read the page
ae7q_new_raw <- read_html(ae7q_new_url)
# Make sure the new license table exists first
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
# Get tables and clean up
ae7q_new_tables <- ae7q_new_raw %>%
html_elements(xpath = "//table") %>%
html_table()
# Find the right table by the column names
right_table_id <- grep(paste(c("Callsign",
"Region/ State",
"Entity Name",
"Applicant Type",
"Licensee Class",
"License Status",
"Action Type"), collapse = " "),
lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
ae7q_list[[i]] <- ae7q_table_new %>%
#mutate(across(everything(), ~na_if(., "\""))) %>%
mutate(across(everything(),
~replace(., . == "\"", NA))) %>%
fill(everything()) %>%
group_by(`Action Type`) %>%
summarize(count = n(), .groups = "keep") %>%
mutate(date = date_vec[i],
source = "AE7Q", source_detail = ae7q_new_url) %>%
relocate(date)
} else {
ae7q_list[[i]]<- data.frame("date" = date_vec[i],
"Action Type" = NA,
"count" = NA,
"source" = "AE7Q",
"source_detail" = ae7q_new_url)
}
# Wait for random time up to 10 seconds
Sys.sleep(sample(1:5, 1))
}
# Combine all the data and sort by date
a <- bind_rows(ae7q_list) %>%
arrange(date) %>%
filter(!is.na(date))
write.csv(a, "out/ae7q-actions-scraped03.csv", row.names = F)

View File

@ -6,6 +6,7 @@ library(tidyr)
# sudo crontab -e
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1
###### ARRL ######
arrl_url <- "https://www.arrl.org/fcc-license-counts"
# Read the page
@ -127,7 +128,7 @@ hamcall_table_state <- left_join(hamcall_table_state, state_codes, by = join_by(
relocate(source_name:source_detail, .after = m)
###### AE7Q ######
###### AE7Q States ######
ae7q_url <- "https://www.ae7q.com/query/stat/LicenseUSA.php"
# Read the page
@ -137,19 +138,76 @@ ae7q_raw <- read_html(ae7q_url)
ae7q_tables <- ae7q_raw %>%
html_elements(xpath = "//table") %>%
html_table()
ae7q_table_state <- ae7q_tables[[20]]
ae7q_table_state_raw <- ae7q_tables[[20]]
# Fix names
names(ae7q_table_state) <- ae7q_table_state[1,]
ae7q_table_state <- ae7q_table_state[-1,]
names(ae7q_table_state_raw) <- ae7q_table_state_raw[1,]
ae7q_table_state_raw <- ae7q_table_state_raw[-1,]
# TODO
# split percents out into other columns (separate_wider_delim() ?)
# etc.
ae7q_table_state <- ae7q_table_state_raw %>%
pivot_longer(cols = -"State or Territory") %>%
# remove percentages
mutate(value = gsub("\\s*\\([^\\)]+\\)", "", value)) %>%
pivot_wider(id_cols = "State or Territory") %>%
# Split states
separate(`State or Territory`,
into = c("state_code", "state_name"),
sep = " - ",
fill = "right") %>%
mutate(state_name = case_when(state_code == "-" ~ "Other*",
state_code == "Totals" ~ "TOTAL",
TRUE ~ state_name)) %>%
# Organize
select(c(-GeoRegion, -state_code)) %>%
mutate(date = Sys.Date(),
ttp=NA, conditional=NA, military=NA, multiple=NA, repeater=NA,
gmrs=NA, source="AE7Q", source_detail=ae7q_url) %>%
relocate(date, state_name, Novice, Technician, TechnicianPlus,
General, Advanced, AmateurExtra, Total, ttp, conditional,
Club)
###### AE7Q License Actions ######
ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=", Sys.Date()-1)
#ae7q_new_url <- paste0("https://www.ae7q.com/query/list/ProcessDate.php?DATE=2024-11-01")
# Read the page
ae7q_new_raw <- read_html(ae7q_new_url)
# Make sure the new license table exists first
if(!grepl("No license grants found issued on", ae7q_new_raw %>% html_text())){
# Get tables and clean up
ae7q_new_tables <- ae7q_new_raw %>%
html_elements(xpath = "//table") %>%
html_table()
# Find the right table by the column names
right_table_id <- grep(paste(c("Callsign",
"Region/ State",
"Entity Name",
"Applicant Type",
"Licensee Class",
"License Status",
"Action Type"), collapse = " "),
lapply(ae7q_new_tables, function(x) paste(names(x), collapse = " ")))
ae7q_table_new <- ae7q_new_tables[[right_table_id]]
ae7q_sum01 <- ae7q_table_new %>%
#mutate(across(everything(), ~na_if(., "\""))) %>%
mutate(across(everything(),
~replace(., . == "\"", NA))) %>%
fill(everything()) %>%
group_by(`Action Type`) %>%
summarize(count = n(), .groups = "keep") %>%
mutate(date = Sys.Date()-1,
source = "AE7Q", source_detail = ae7q_new_url) %>%
relocate(date)
} else {
ae7q_sum01<- data.frame("date" = Sys.Date(),
"Action Type" = NA,
"count" = NA,
"source" = "AE7Q",
"source_detail" = ae7q_new_url)
}
##### Append tables #####
@ -167,3 +225,13 @@ write.table(hamcall_table_state, file = "out/hamcall-states-scraped.csv", sep =
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")
write.table(ae7q_table_state, file = "out/ae7q-states-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")
write.table(ae7q_sum01, file = "out/ae7q-actions-scraped.csv", sep = ",",
append = TRUE, quote = FALSE,
col.names = F, row.names = FALSE,
na = "")

16008
data/ae7q-actions-scraped.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 572 KiB

BIN
plots/actions-over-time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 242 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 243 KiB

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 272 KiB

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

After

Width:  |  Height:  |  Size: 642 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 615 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 279 KiB

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 176 KiB