## ----echo = FALSE-------------------------------------------------------------
old_opts <- options(width = 100L)
on.exit(options(old_opts), add = TRUE)

## ----install, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE-------------

library(ume)
library(pander)

knitr::opts_chunk$set(
  tidy.opts = list(width.cutoff = 60),  # wrap code at ~60 characters
  tidy = TRUE
)

# only demo library ume::lib_demo is used in this vignette
  data(ume::lib_demo)


## ----eval=FALSE---------------------------------------------------------------
# 
# pl <- as_peaklist("your_path_to.csv")
# 

## ----example_short1, eval = F, warning=FALSE----------------------------------
# 
#   mfd <- ume_assign_formulas(pl = peaklist_demo,
#                              formula_library = lib,
#                              pol = "neg",
#                              ma_dev = 0.5,
#                              remove_isotopes = T)

## ----example_short2, eval = F, warning=FALSE----------------------------------
# 
#   mfd_filt <- ume_filter_formulas(
#     mfd = mfd,
#     remove_isotopes = TRUE,
#     normalization = "bp",
#     norm_int_min = 0.5,
#     blank_file_ids = 1,
#     blank_prevalence = 0.5,
#     dbe_o_max = 10,
#     oc_min = 0.2, oc_max = 1.2,
#     c_iso_check = TRUE,
#     dbe_max = 30,
#     p_min = 0, p_max = 0,
#     mz_min = 150, mz_max = 650
#     )

## ----function_arguments, eval = F, warning = F, echo = F----------------------
#   args(ume::filter_mf_data)
#   args(ume::filter_int)
# 
#  #All available filter arguments:
#   help(ume_filter_formulas)
# 

## ----example_long, eval = F, echo = TRUE--------------------------------------
# # Step 1: Assign formulas (checks the peaklist format and calculates neutral masses and mass accuracy)
#   # calc_neutral_mass() and calc_ma_abs()
#   mfd <- assign_formulas(pl = ume::peaklist_demo, formula_library = ume::lib_demo,
#                         pol = "neg", ma_dev = 0.5, verbose = TRUE)
# 
# # Step 2: Verify the existence of the major isotope signals and their magnitudes
#   mfd <- eval_isotopes(mfd = mfd, remove_isotopes = TRUE, verbose = TRUE)
# 
# # Step 3: Calculate evaluation parameters
#   mfd <- calc_eval_params(mfd = mfd, verbose = TRUE)
# 
# # Step 4: Add known classification for formulas
#   # to do: the categories should be listed in one column containing the category assignment
#   mfd <- add_known_mf(mfd = mfd)
# 
# # Step 5: Remove all formulas that occur in one or more blank analyses
#   # The demo peaklist contains one blank spectrum named "Blank" (file_id = 1)
#   # This removes all molecular formulas recorded in the blank from the entire dataset
#   mfd <- remove_blanks(mfd = mfd, blank_file_ids = 1, blank_prevalence = 0)
# 
# # Step 6: Filter formula table according to evaluation parameters (generated in step 3)
#   mfd_filt <- filter_mf_data(mfd = mfd,
#                              select_file_ids = 2:5,
#                              dbe_o_max = 10,
#                              oc_min = 0.2,
#                              oc_max = 1.2,
#                              verbose = TRUE)
# 
# # Step 7: Normalize intensities
#   mfd_filt <- calc_norm_int(mfd = mfd_filt, normalization = "bp", verbose = TRUE)
# 
# # Step 8: Filter by (relative) peak magnitude (in this case: >= 5 percent base peak intensity)
#   mfd_filt <- filter_int(mfd = mfd_filt, norm_int_min = 0.5, verbose = TRUE)
# 
# # Step 9: Normalize intensities
#   mfd_filt <- calc_norm_int(mfd = mfd_filt, normalization = "bp", verbose = TRUE)
# 
# # Step 10: Order the columns of the results table
#   mfd_filt <- order_columns(mfd = mfd_filt)
# 

## ----eval = FALSE, warning=FALSE----------------------------------------------
# 
# # Mass spectrum
#   uplot_ms(pl = ume::peaklist_demo, label = "file")
# 
# # Summary statistics
#   calc_data_summary(mfd = ume::mf_data_demo)
# 
# # Mass accuracy
#   uplot_freq_ma(mfd = ume::mf_data_demo)
# 
# # Element frequency
#   uplot_freq(mfd = ume::mf_data_demo, var = "14N")
# 
# # van Krevelen
#   uplot_vk(mfd = ume::mf_data_demo, size_dots = 3)
# 
# # Precision isotope abundance:
#   uplot_isotope_precision(mfd = ume::mf_data_demo, z_var = "nsp_tot", tf = F)
# 

## ----eval = F, warning=FALSE--------------------------------------------------
# output_recal <- calc_recalibrate_ms(
#   pl = peaklist_demo[file != "Blank"],
#   calibr_list = "marine_dom",
#   pol = "neg",
#   min_no_calibrants = 3,
#   ma_dev = 1,
#   formula_library = lib_demo
# )
# 
# summary(output_recal)
# output_recal$cal_stats # summary statistics for each file_id in peaklist
# 
# # Result plots
#   output_recal$fig_box_before
#   output_recal$fig_box_after
#   output_recal$fig_hist_before
#   output_recal$fig_hist_after
# 
# # The re-calibrated peaklist is available via
#   output_recal$pl
# 
# # It can directly be used to start a new formula assignment process (see above):
#   mfd_recal <- ume::ume_assign_formulas(
#     pl = output_recal$pl,
#     formula_library = ume::lib_demo,
#     pol = "neg",
#     ma_dev = 1
#   )
# 
# # Automated mass accuracy sub-setting can be obtained using the column "ppm_filt".
# # It is based on the quantiles 97.5% and 2.5% of all CHO formulas assigned.
# 
#   mfd_recal <- mfd_recal[abs(ppm) <= ppm_filt]
# 
#   uplot_freq_ma(mfd_recal)
# 

## ----example_peaklist, results = "asis", warning=FALSE, echo = FALSE----------
  pander::pandoc.table(peaklist_demo[1:3], digits = 8)

## ----results = "asis", echo = FALSE-------------------------------------------
cols <- names(masses)[!names(masses) %in% c("valence2")]
pander::pandoc.table(masses[1:3, ..cols], digits = 8)

## ----results = "asis", echo = FALSE-------------------------------------------

pander::pandoc.table(ume::lib_demo[1:3], digits = 10)


## ----eval = F-----------------------------------------------------------------
# 
#   ume_custom_library <- create_ume_formula_library(max_mass = 50, max_formula = "C5H12O10")
# 

## ----eval = F, echo = FALSE---------------------------------------------------
# 
# ## 5. UME core functions
# #**(documentation to be expanded)**
# 
# ### Double bond equivalent (DBE)
# 
# # Calculates DBE for a given formula. Uses isotope masses and element valences defined in *masses.rda*.
# 
# 

## ----function_examples--------------------------------------------------------

# Calculate double bond equivalent for a molecular formula
calc_dbe("C2H4")

# Nominal mass
calc_nm(c("C2[13C]H4", "C2H4"))

# Exact mass
calc_exact_mass("C2[13C]H4")

# Neutral mass for (de-) protonated ions
calc_neutral_mass(123.1241, pol = "neg")

# Formula to table
dt <- convert_molecular_formula_to_data_table("C2[13C]H4")
dt

# Table to formula
convert_data_table_to_molecular_formulas(dt[, .(`12C`, `13C`, `1H`)])


## ----installation, eval=FALSE-------------------------------------------------
# 
# # Local installation from tarball
# # This in case that you have previously installed the UME package:
# detach("package:ume", unload = TRUE)
# .rs.restartR()
# 
# # Install from tarball (adjust your path accordingly)
# utils::install.packages(
#   "your_path_to/ume.tar.gz",
#   repos = NULL,
#   type = "source"
# )
# 

