Add a script to scrape the ARRL FCC page.
This commit is contained in:
parent
ddcc8aa5c8
commit
3b9d756f6f
42
bin/scrape-arrl-fcc.R
Normal file
42
bin/scrape-arrl-fcc.R
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
library(rvest)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
arrl_url <- "https://www.arrl.org/fcc-license-counts"
|
||||
|
||||
# Read the page
|
||||
d_raw <- read_html(arrl_url)
|
||||
|
||||
# Get date
|
||||
date_raw <- d_raw %>%
|
||||
html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>%
|
||||
# Get date
|
||||
html_text() %>%
|
||||
gsub(".*, ","",.) %>%
|
||||
as.Date(format = "%d-%b-%Y")
|
||||
|
||||
# Get table and clean up
|
||||
tab <- d_raw %>%
|
||||
html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>%
|
||||
html_table() %>%
|
||||
.[[1]] %>%
|
||||
# Add date col
|
||||
bind_cols(Date = date_raw, .) %>%
|
||||
# Insert Tech Plus for compatibility
|
||||
mutate("Tech-Plus" = NA,
|
||||
.before = General)
|
||||
|
||||
# Append table
|
||||
write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",",
|
||||
append = TRUE, quote = FALSE,
|
||||
col.names = TRUE, row.names = FALSE)
|
||||
|
||||
# Clean up table to remove any duplicates (sometimes the page isn't updated regularly)
|
||||
db <- read.csv("out/arrl-fcc-licenses-scraped.csv")
|
||||
db2 <- db %>% distinct(.keep_all = TRUE) %>%
|
||||
filter(Date != "Date")
|
||||
write.csv(db2, "out/arrl-fcc-licenses-scraped.csv",
|
||||
quote = F,
|
||||
row.names = F)
|
||||
|
Loading…
Reference in New Issue
Block a user