## ----setup, include = FALSE---------------------------------------------------
options(rmarkdown.html_vignette.check_title = FALSE)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----packages, eval = FALSE---------------------------------------------------
# library(datamuseum)
# 
# library(tidyr)
# library(dplyr)
# library(lubridate)
# library(stringr)
# library(googlesheets4)
# 
# library(ggplot2)
# library(maps)
# library(rnaturalearth)
# library(rnaturalearthdata)
# library(sf)
# 

## ----GBIF 3/30/2026, eval = FALSE---------------------------------------------
# 
# #Raw Data
# 
# GBIF_raw <- read.csv("GBIF_Octopodoidea_raw.csv") #88256 Observations
# 
# #Trimmed Data
# 
# GBIF_clean <- GBIF_raw[ -c(1:7, 11:15, 24:32, 34:36, 38, 40:50)]
# 
# GBIF_clean <- GBIF_clean[ -c(7, 9)] #88256 Observations
# 
# #Also available on Github as GBIF_Octopodoidea_trim.csv
# 
# #Japan Octopus Data
# 
# GBIF_Japan <- latlong_range(GBIF_clean, "decimalLatitude", "decimalLongitude",
#                              25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename(
#               "Prefecture" = "stateProvince",
#               "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude",
#               "Year" = "year", "Genus" = "genus", "Country" = "countryCode",
#               "SciName" = "species", "Family" = "family","Source" = "institutionCode") #2145 Observations
# 
# GBIF_Japan <- replace(GBIF_Japan, GBIF_Japan=='', NA)
# 
# GBIF_Japan <- GBIF_Japan %>%
#   filter(!is.na(Source) & !is.na(Family) &  !is.na(Genus) & !is.na(SciName)
#          & !is.na(Year)) #798 Observations
# 

## ----InvBase 3/30/2026, eval = FALSE------------------------------------------
# 
# #Raw Data
# 
# InvBase_raw <- read.csv("InvBase_Octopodoidea_raw.csv") #22608 Observations
# 
# #Trimmed Data
# 
# InvBase_clean <- InvBase_raw[ -c(1, 3:6, 8:13, 15:17, 19, 21:38, 40:62, 64:70, 72, 75:79, 82:103)] #22608 Observations
# 
# #Also available on Github as InvBase_Octopodoidea_trim.csv
# 
# #Japan Octopus Data
# 
# InvBase_Japan <- latlong_range(InvBase_clean, "decimalLatitude", "decimalLongitude",
#                              25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename(
#               "Prefecture" = "stateProvince", "Country" = "country",
#               "Precise Location" = "county", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude",
#               "Year" = "year", "Genus" = "genus", "Source" = "institutionCode", "Family" = "family"
#               ) #50 Observations
# 
# InvBase_Japan <- replace(InvBase_Japan, InvBase_Japan=='', NA)
# 
# InvBase_Japan <- InvBase_Japan %>%
#   filter(!is.na(Source) & !is.na(Family) &  !is.na(Genus) & !is.na(specificEpithet)
#          & !is.na(Year)) #43 Observations
# 
# taxon_column(InvBase_Japan, output = "list")
# 
# taxon_rank(InvBase_Japan, c(Family, Genus, specificEpithet))
# 
# InvBase_Japan <- taxon_combine(InvBase_Japan, genus = Genus, epithet = specificEpithet,
#                             new_column = "SciName")
# 
# InvBase_Japan <- InvBase_Japan[ -c(5)]
# 

## ----BISMAL 3/30/2026, eval = FALSE-------------------------------------------
# 
# #Raw Data
# 
# BISMAL_raw <- read.csv("BISMAL_Octopodoidea_raw.csv") #1547 Observations
# 
# #Trimmed Data
# 
# BISMAL_clean <- BISMAL_raw[ -c(1:11, 13:18, 20:21, 23:49, 51:67, 67, 70,
#                                        72:79, 82:99, 100:104, 106:108, 110, 112:116)] #1547 Observations
# 
# #Also available on Github as BISMAL_Octopodoidea_trim.csv
# 
# #Japan Octopus Data
# 
# BISMAL_Japan <- latlong_range(BISMAL_clean, "decimalLatitude", "decimalLongitude",
#                              25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( "Prefecture" = "stateProvince",
#               "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude",
#               "Year" = "year", "Genus" = "genus", "Country" = "country", "Source" = "institutionCode",
#               "Family" = "family") #1507 Observations
# 
# BISMAL_Japan <- replace(BISMAL_Japan, BISMAL_Japan=='', NA)
# 
# BISMAL_Japan <- BISMAL_Japan %>%
#   filter(!is.na(Source) & !is.na(Family) &  !is.na(Genus) & !is.na(specificEpithet)
#          & !is.na(Year)) #473 Observations
# 
# taxon_column(BISMAL_Japan, output = "list")
# 
# taxon_rank(BISMAL_Japan, c(Family, Genus, specificEpithet))
# 
# BISMAL_Japan <- taxon_combine(BISMAL_Japan, genus = Genus, epithet = specificEpithet,
#                             new_column = "SciName")
# 
# BISMAL_Japan <- BISMAL_Japan[ -c(12)]
# 

## ----OBIS 3/30/2026, eval = FALSE---------------------------------------------
# 
# #Raw Data
# 
# OBIS_raw <- read.csv("OBIS_Octopodoidea_raw.csv") #58526 Observations
# 
# #Trimmed Data
# 
# OBIS_clean <- OBIS_raw[ -c(0:20, 22:28, 30:39, 41, 44:63, 65:74, 76:100, 102:112, 114:127, 129:203, 205:209,
#                                    211:282)] #58526 Observations
# 
# #Also available on Github as OBIS_Octopodoidea_trim.csv
# 
# #Japan Octopus Data
# 
# OBIS_Japan <- latlong_range(OBIS_clean, "decimalLatitude", "decimalLongitude",
#                              25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename(
#               "Prefecture" = "stateProvince", "Country" = "country",
#               "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude",
#               "Year" = "date_year", "Source" = "institutionCode", "Family" = "family", "Genus" = "genus",
#               "SciName" = "species") #859 Observations
# 
# OBIS_Japan <- replace(OBIS_Japan, OBIS_Japan=='', NA)
# 
# OBIS_Japan <- OBIS_Japan %>%
#   filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & !is.na(SciName)
#          & !is.na(Year)) #698 Observations
# 

## ----NSMT 1/19/2024 + 3/11/2024, eval = FALSE---------------------------------
# 
# #Raw Data
# 
# NSMT_raw <- read.csv("NSMT_Octopodoidea_raw.csv") #870 Observations
# 
# #Trimmed Data
# 
# NSMT_clean <- NSMT_raw[ -c(5, 9:12, 16:18, 22:23)] #870 Observations
# 
# #Also available on Github as NSMT_Octopodoidea_trim.csv
# 
# #Japan Octopus Data
# 
# NSMT_Japan <- latlong_range(NSMT_clean, "Latitude", "Longitude",
#                             25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename(
#                               "Prefecture" = "Region", "Precise Location" = "Previse.loc.",
#                               "Source" = "Group.Abb.") #727 Observations
# 
# NSMT_Japan <- replace(NSMT_Japan, NSMT_Japan=='', NA)
# 
# NSMT_Japan <- NSMT_Japan %>% filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) &	
#                                       !is.na(Species)& !is.na(Year)) #726 Observations
# 
# taxon_column(NSMT_Japan, output = "list")
# 
# taxon_rank(NSMT_Japan, c(Family, Genus, Species))
# 
# NSMT_Japan <- taxon_combine(NSMT_Japan, genus = Genus, epithet = Species,
#                             new_column = "SciName")
# 
# NSMT_Japan <- NSMT_Japan[ -c(6, 7)]
# 

## ----museum, eval = FALSE-----------------------------------------------------
# 
# museum <- rbind(
#   InvBase_Japan %>% mutate(`Data Frame` = "InvBase"),
#   GBIF_Japan    %>% mutate(`Data Frame` = "GBIF"),
#   NSMT_Japan   %>% mutate(`Data Frame` = "NSMT"),
#   OBIS_Japan    %>% mutate(`Data Frame` = "OBIS"),
#   BISMAL_Japan  %>% mutate(`Data Frame` = "BISMAL")
# ) #2738 Observations
# 
# museum <- deduplicate(museum, "catalogNumber",
#                       drop_na = TRUE) #143 duplicate rows removed; 608 rows removed due to missing ID. 1987 Observations
# 
# museum_dupes <- attr(museum, "duplicates") #1268 Observations, 143 FALSE
# 
# museum <- duplicate(museum, "individualCount") #Duplication increased count from 1894 Observations to 2671 Observations

## ----museum_taxon, eval = FALSE-----------------------------------------------
# 
# #Taxonomized Japan Octopus Data
# 
# museum_clean <- taxon_cleaner(museum, SciName, in_place = TRUE, drop_na = TRUE) #2260 Observations
# 
# museum_clean <- museum_clean %>% mutate(SciName = case_when(
#     SciName == "Octopus vulgaris"        ~ "Octopus sinensis",
#     TRUE                                 ~ SciName))
# 
# museum_valid <- taxon_validate(museum_clean, SciName, update_related = TRUE)
# 
# valid_report <- attr(museum_valid, "validation_report")
# 
# museum_check <- taxon_spellcheck(museum_valid, c(SciName), update = TRUE,
#                                      validation_report = valid_report)
# 
# check_report <- attr(museum_check, "spellcheck_report")
# 
# museum_check <- museum_check %>%
#   mutate(SciName = case_when(
#     SciName == "Pinnoctopus macropus" ~ "Callistoctopus macropus",
#     TRUE                              ~ SciName
#   ))
# 
# museum_taxon <- taxon_add(museum_check, SciName, rank = c("order", "phylum", "family", "genus"),
#                               author_year = FALSE, sort = FALSE)
# 
# add_report <- attr(museum_taxon, "add_report")
# 
# museum_taxon <-  museum_taxon[ -c(3,4)]
# 
# museum_taxon <- museum_taxon %>%
#   dplyr::rename(
#     "Order" = "order", "Phylum" = "phylum",
#     "Family" = "family", "Genus" = "genus")
# 
# museum_taxon <- taxon_sort(museum_taxon)
# 
# museum_taxon <- taxon_cite(museum_taxon, c(Family, Genus, SciName))
# 
# cite_report <- attr(museum_taxon, "cite_report")
# 
# museum_taxon <- museum_taxon %>%
#   filter(SciName != "Muusoctopus small in mature") %>%
#   mutate(Family_cite = case_when(
#     Family_cite == "Bathypolypodidae" ~ "Bathypolypodidae (G. C. Robson, 1929)",
#     Family_cite == "Enteroctopodidae" ~ "Enteroctopodidae (Strugnell, M. Norman, Vecchione, Guzik & Allcock, 2014)",
#     Family_cite == "Megaleledonidae" ~ "Megaleledonidae (Iw. Taki, 1961)",
#     TRUE                              ~ Family_cite
#   ))
# 
# museum_taxon <- italicize(museum_taxon, c(Genus_cite, SciName_cite))

## ----graph, eval = FALSE, fig.width = 16, fig.height = 10---------------------
# 
# world_map <- map_data("world")
# 
# japan <- map_data("world", region="japan")
# 
# family_labels <- c(
#   "Octopodidae (D'Orbigny, 1839-1842 in Férussac and D'Orbigny, 1834-1848)"   = "Octopodidae\n(D'Orbigny, 1839-1842\nin Férussac and\nD'Orbigny, 1834-1848)",
#   "Amphitretidae (Hoyle, 1886)"                                                = "Amphitretidae\n(Hoyle, 1886)",
#   "Enteroctopodidae (Strugnell, M. Norman, Vecchione, Guzik & Allcock, 2014)" = "Enteroctopodidae\n(Strugnell, M. Norman,\nVecchione, Guzik\n& Allcock, 2014)",
#   "Bathypolypodidae (G. C. Robson, 1929)"                                             = "Bathypolypodidae\n(G. C. Robson, 1929)",
#   "Megaleledonidae (Iw. Taki, 1961)"                                                  = "Megaleledonidae\n(Iw. Taki, 1961)"
# )
# 
# lon_min <- 125
# lon_max <- 150
# lat_min <- 25
# lat_max <- 50
# 
# museum_taxon$Longitude <- as.numeric(museum_taxon$Longitude)
# museum_taxon$Latitude  <- as.numeric(museum_taxon$Latitude)
# 
# genera_per_family <- museum_taxon %>%
#   select(Genus_cite_italic, Family) %>%
#   distinct() %>%
#   count(Family)
# 
# family_palettes <- list(
#   "Amphitretidae"    = sequential_hcl(n = 2, palette = "BrwnYl",   l = c(30, 55)),
#   "Bathypolypodidae" = sequential_hcl(n = 1, palette = "Reds",     l = c(45, 45)),
#   "Enteroctopodidae" = sequential_hcl(n = 3, palette = "Peach"),
#   "Megaleledonidae"  = sequential_hcl(n = 1, palette = "YlOrRd",   l = c(55, 55)),
#   "Octopodidae"      = sequential_hcl(n = 7, palette = "Purples",  l = c(25, 65))
# )
# 
# genus_family_map <- museum_taxon %>%
#   select(Genus_cite_italic, Family) %>%
#   distinct() %>%
#   arrange(Family, Genus_cite_italic)  # arrange so shades are assigned alphabetically
# 
# genus_colors <- unlist(lapply(names(family_palettes), function(fam) {
#   genera <- genus_family_map$Genus_cite_italic[genus_family_map$Family == fam]
#   colors <- family_palettes[[fam]]
#   setNames(colors, genera)
# }))
# 
# # Create ordered breaks grouped by family
# genus_order <- museum_taxon %>%
#   select(Genus_cite_italic, Family, Family_cite) %>%
#   distinct() %>%
#   arrange(factor(Family, levels = c("Amphitretidae", "Bathypolypodidae",
#                                      "Enteroctopodidae", "Megaleledonidae",
#                                      "Octopodidae")),
#           Genus_cite_italic)
# 
# # Step 1 - build genus_order_with_headers
# genus_order_with_headers <- genus_order %>%
#   group_by(Family) %>%
#   group_modify(~ {
#     family_cite_label <- paste0("bold(", .y$Family, ")")
#     bind_rows(
#       data.frame(
#         Genus_cite_italic = family_cite_label,
#         Family_cite       = .x$Family_cite[1]
#       ),
#       .x %>% select(Genus_cite_italic, Family_cite)
#     )
#   }) %>%
#   ungroup() %>%
#   pull(Genus_cite_italic)
# 
# # Step 2 - add spacer before Megaleledonidae
# meg_pos <- which(genus_order_with_headers == "bold(Megaleledonidae)")
# genus_order_with_headers <- c(
#   genus_order_with_headers[1:(meg_pos - 1)],
#   "' '",
#   genus_order_with_headers[meg_pos:length(genus_order_with_headers)]
# )
# 
# # Step 3 - build colors
# header_colors <- setNames(
#   rep("#FFFFFF00", 5),
#   grep("^bold", genus_order_with_headers, value = TRUE)
# )
# spacer_color      <- setNames("#FFFFFF00", "' '")
# genus_colors_final <- c(genus_colors, header_colors, spacer_color)
# 
# # Step 4 - legend overrides
# legend_breaks <- intersect(genus_order_with_headers, names(genus_colors_final))
# n <- length(legend_breaks)
# spacer_pos <- which(legend_breaks == "' '")
# header_pos <- which(grepl("^bold", legend_breaks))
# hide_pos   <- c(spacer_pos, header_pos)
# 
# legend_size  <- ifelse(seq_along(legend_breaks) %in% hide_pos, 0, 8)
# legend_alpha <- ifelse(seq_along(legend_breaks) %in% hide_pos, 0, 1)
# 
# legend_fill  <- rep(NA_character_, n)
# legend_color <- rep(NA_character_, n)
# legend_stroke <- rep(0.5, n)
# 
# legend_fill[hide_pos]   <- "transparent"
# legend_color[hide_pos]  <- "transparent"
# legend_stroke[hide_pos] <- 0
# 
# ggplot(data = world_map, aes(long, lat)) +
#   geom_polygon(aes(group = group)) +
#   coord_sf(xlim   = c(lon_min - 1, lon_max + 1), ylim   = c(lat_min - 1, lat_max + 1),
#   expand = FALSE) +
#   geom_point(data = museum_taxon,
#              aes(x = Longitude, y = Latitude, color = Genus_cite_italic),
#              size = 3,
#              position = position_jitter(width = .1, height = .1)) +
#   labs(x = "Longitude", y = "Latitude", color = "Genus") +
#   scale_colour_manual(
#   values = genus_colors_final,
#   breaks = genus_order_with_headers,
#   limits = names(genus_colors_final),
#   labels = function(x) lapply(x, function(i) parse(text = i))
#   ) +
#   guides(
#   color = guide_legend(
#     override.aes = list(
#       size  = legend_size,
#       alpha = legend_alpha), ncol = 4
#   )) +
#   theme(
#   plot.margin        = margin(t = .05, r = 1, b = .05, l = 1, unit = "mm"),
#   legend.key         = element_rect(fill = "transparent", color = "transparent"),
#   legend.position        = "bottom",
#   legend.title.position  = "top",
#   legend.spacing.y   = unit(0.25, "mm"),
#   legend.spacing.x   = unit(1, "mm"),
#   legend.title           = element_text(size = 16),
#   legend.text            = element_text(size = 14),
#   strip.text             = element_text(size = 16),
#   axis.title             = element_text(size = 20),
#   axis.text.x            = element_text(size = 16, angle = 60, hjust = 1),
#   axis.text.y            = element_text(size = 16)
# ) +
#   facet_wrap(~Family_cite, nrow = 1, labeller = as_labeller(family_labels))
# 
# ggsave("octopodoidea_japan.png", width = 16, height = 7.5, units = "in", dpi = 450)
# 

## ----figure, echo = FALSE, out.width = "100%", fig.cap = "Octopodoidea occurrences in Japan by family and genus."----
knitr::include_graphics("octopodoidea_japan.png")

