Add a script to scrape the ARRL FCC page.
This commit is contained in:
		
							
								
								
									
										42
									
								
								bin/scrape-arrl-fcc.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								bin/scrape-arrl-fcc.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  |  | ||||||
|  | library(rvest) | ||||||
|  | library(dplyr) | ||||||
|  | library(tidyr) | ||||||
|  |  | ||||||
|  | arrl_url <- "https://www.arrl.org/fcc-license-counts" | ||||||
|  |  | ||||||
|  | # Read the page | ||||||
|  | d_raw <- read_html(arrl_url) | ||||||
|  |  | ||||||
|  | # Get date | ||||||
|  | date_raw <- d_raw %>%  | ||||||
|  |             html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>%  | ||||||
|  |             # Get date | ||||||
|  |             html_text() %>%  | ||||||
|  |             gsub(".*, ","",.) %>%  | ||||||
|  |             as.Date(format = "%d-%b-%Y") | ||||||
|  |  | ||||||
|  | # Get table and clean up | ||||||
|  | tab <- d_raw %>% | ||||||
|  |     html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>% | ||||||
|  |     html_table() %>%  | ||||||
|  |     .[[1]] %>%  | ||||||
|  |     # Add date col | ||||||
|  |     bind_cols(Date = date_raw, .) %>%  | ||||||
|  |     # Insert Tech Plus for compatibility | ||||||
|  |     mutate("Tech-Plus" = NA, | ||||||
|  |            .before = General) | ||||||
|  |  | ||||||
|  | # Append table | ||||||
|  | write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",",  | ||||||
|  |             append = TRUE, quote = FALSE,  | ||||||
|  |             col.names = TRUE, row.names = FALSE)  | ||||||
|  |  | ||||||
|  | # Clean up table to remove any duplicates (sometimes the page isn't updated regularly) | ||||||
|  | db <- read.csv("out/arrl-fcc-licenses-scraped.csv") | ||||||
|  | db2 <- db %>% distinct(.keep_all = TRUE) %>%  | ||||||
|  |         filter(Date != "Date") | ||||||
|  | write.csv(db2, "out/arrl-fcc-licenses-scraped.csv", | ||||||
|  |           quote = F, | ||||||
|  |           row.names = F) | ||||||
|  |  | ||||||
		Reference in New Issue
	
	Block a user