43 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| 
 | |
| library(rvest)
 | |
| library(dplyr)
 | |
| library(tidyr)
 | |
| 
 | |
| arrl_url <- "https://www.arrl.org/fcc-license-counts"
 | |
| 
 | |
| # Read the page
 | |
| d_raw <- read_html(arrl_url)
 | |
| 
 | |
| # Get date
 | |
| date_raw <- d_raw %>% 
 | |
|             html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>% 
 | |
|             # Get date
 | |
|             html_text() %>% 
 | |
|             gsub(".*, ","",.) %>% 
 | |
|             as.Date(format = "%d-%b-%Y")
 | |
| 
 | |
| # Get table and clean up
 | |
| tab <- d_raw %>%
 | |
|     html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>%
 | |
|     html_table() %>% 
 | |
|     .[[1]] %>% 
 | |
|     # Add date col
 | |
|     bind_cols(Date = date_raw, .) %>% 
 | |
|     # Insert Tech Plus for compatibility
 | |
|     mutate("Tech-Plus" = NA,
 | |
|            .before = General)
 | |
| 
 | |
| # Append table
 | |
| write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",", 
 | |
|             append = TRUE, quote = FALSE, 
 | |
|             col.names = TRUE, row.names = FALSE) 
 | |
| 
 | |
| # Clean up table to remove any duplicates (sometimes the page isn't updated regularly)
 | |
| db <- read.csv("out/arrl-fcc-licenses-scraped.csv")
 | |
| db2 <- db %>% distinct(.keep_all = TRUE) %>% 
 | |
|         filter(Date != "Date")
 | |
| write.csv(db2, "out/arrl-fcc-licenses-scraped.csv",
 | |
|           quote = F,
 | |
|           row.names = F)
 | |
| 
 |