library(rvest)
library(dplyr)
library(tidyr)

# sudo crontab -e
# 5 9 * * * su matt -c "cd /home/matt/ham-radio-licenses/; Rscript /home/matt/ham-radio-licenses/scrape-license-counts.R">/dev/null 2>&1

arrl_url <- "https://www.arrl.org/fcc-license-counts"

# Read the page
d_raw <- read_html(arrl_url)

# Get date
date_raw <- d_raw %>% 
            html_nodes(xpath = "/html/body/div[1]/div/div[2]/div/div[2]/div[1]/p[2]/em") %>% 
            # Get date
            html_text() %>% 
            gsub(".*, ","",.) %>% 
            as.Date(format = "%d-%b-%Y")

# Get table and clean up
tab <- d_raw %>%
    html_nodes(xpath="/html/body/div[1]/div/div[2]/div/div[2]/div[1]/table") %>%
    html_table() %>% 
    .[[1]] %>% 
    # Add date col
    bind_cols(Date = date_raw, .) %>% 
    # Insert Tech Plus for compatibility
    mutate("Tech-Plus" = NA,
           .before = General) %>% 
    mutate(a=NA, b=NA, c=NA, d=NA, e=NA, f=NA, g=NA,
           source_name = "ARRL FCC License Counts",
           source_detail = "http://www.arrl.org/fcc-license-counts")

# Append table
write.table(tab, file = "out/arrl-fcc-licenses-scraped.csv", sep = ",", 
            append = TRUE, quote = FALSE, 
            col.names = F, row.names = FALSE,
            na = "") 

# Clean up table to remove any duplicates (sometimes the page isn't updated regularly)
# db <- read.csv("out/arrl-fcc-licenses-scraped.csv")
# db2 <- db %>% distinct(.keep_all = TRUE) %>% 
#         filter(Date != "Date")
# write.csv(db2, "out/arrl-fcc-licenses-scraped.csv",
#           quote = F,
#           row.names = F,
#           na = "")

###### HamCall ###### 
hamcall_url <- "https://hamcall.net/hamcallcounts.html"

# Read the page
hamcall_raw <- read_html(hamcall_url)

# Get date (weird for HamCall because no xpath)
hamcall_date <- hamcall_raw %>% 
                    html_text() %>% 
                    as.character() %>% 
                    gsub(".*All counts current as of ", "", .) %>% 
                    gsub("\n\r\n\r\nAll Current US Hams.*", "", .) %>% 
                    as.Date()

# Get tables and clean up
hamcall_tables <- hamcall_raw %>%
    html_elements(xpath = "//table") %>% 
    html_table()

hamcall_table_all_hams_raw <- hamcall_tables[[2]]
hamcall_table_class_raw <- hamcall_tables[[3]]
hamcall_table_city_raw <- hamcall_tables[[4]]
hamcall_table_state_raw <- hamcall_tables[[5]]

# Total licenses and class counts
hamcall_table_class_pivot <- hamcall_table_class_raw %>% 
                            pivot_wider(names_from = "Class",
                                        values_from = "Count") %>% 
                            # Grab total and date
                            mutate(date = hamcall_date,
                                   state = "TOTAL",
                                   techplus = NA,
                                   total = hamcall_table_all_hams_raw[1,2] %>% pull()) %>% 
                            # Arrange columns
                            relocate(date, state, N, T, techplus, G, A, E, total) %>% 
                            select(1:9) %>% 
                            mutate(a = NA, b = NA,
                                   club = hamcall_table_all_hams_raw[2,2] %>% pull(),
                                   military = hamcall_table_all_hams_raw[3,2] %>% pull(),
                                   c = NA, d = NA, e = NA,
                                   source_name = "HamCall",
                                   source_detail = hamcall_url)

# City counts
hamcall_table_city <- hamcall_table_city_raw %>% 
                      mutate(date = hamcall_date,
                             source_name = "HamCall",
                             source_detail = hamcall_url) %>% 
                      relocate(date)

# State counts
hamcall_table_state <- hamcall_table_state_raw %>% 
                        mutate(date = hamcall_date,
                               source_name = "HamCall",
                               source_detail = hamcall_url) %>% 
                        relocate(date, State, Count, source_name, source_detail)


###### AE7Q ###### 
ae7q_url <- "https://www.ae7q.com/query/stat/LicenseUSA.php"

# Read the page
ae7q_raw <- read_html(ae7q_url)

# Get tables and clean up
ae7q_tables <- ae7q_raw %>% 
                    html_elements(xpath = "//table") %>% 
                    html_table()
ae7q_table_state <- ae7q_tables[[20]]
# Fix names
names(ae7q_table_state) <- ae7q_table_state[1,]
ae7q_table_state <- ae7q_table_state[-1,]

# TODO
# split percents out into other columns (separate_wider_delim() ?)
# etc.


##### Append tables #####
write.table(hamcall_table_class_pivot, file = "out/hamcall-licenses-scraped.csv", sep = ",", 
            append = TRUE, quote = FALSE, 
            col.names = F, row.names = FALSE,
            na = "") 

write.table(hamcall_table_city, file = "out/hamcall-cities-scraped.csv", sep = ",", 
            append = TRUE, quote = FALSE, 
            col.names = F, row.names = FALSE,
            na = "") 

write.table(hamcall_table_state, file = "out/hamcall-states-scraped.csv", sep = ",", 
            append = TRUE, quote = FALSE, 
            col.names = F, row.names = FALSE,
            na = "")