## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setups, include=FALSE----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(123)

## ----install, eval = FALSE----------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("DaparToolshed")

## ----setup, message = FALSE---------------------------------------------------
library(DaparToolshed)

## ----dataset------------------------------------------------------------------
data.file <- system.file("extdata/data", "Exp1_R25_pept_100.txt", package="DaparToolshed")
data <- read.table(data.file, header=TRUE, sep="\t", as.is=TRUE, stringsAsFactors = FALSE)
  
sample.file <- system.file("extdata/data", "samples_Exp1_R25.txt", package="DaparToolshed")
sample <- read.table(sample.file, header=TRUE, sep="\t", as.is=TRUE, stringsAsFactors = FALSE)

## ----importdata, message = FALSE----------------------------------------------
obj <- createQFeatures(data = data,
  file = 'Exp1_R25_pept',
                       sample = sample,
                       indQData = 56:61,
                       keyId = "Sequence",
                       indexForMetacell = 43:48,
                       logData = TRUE,
                       force.na = TRUE,
                       typeDataset = "peptide",
                       parentProtId = "Protein_group_IDs",
                       analysis = "Pept_Data",
                       software = "maxquant")

obj

## ----metadataaccess, message = FALSE------------------------------------------
 #quantitative data tags
head(qMetacell(obj[[length(obj)]]))

 #metadata
head(SummarizedExperiment::rowData(obj[[length(obj)]]), n = 3)

## ----filters, message = FALSE-------------------------------------------------
 #create filter to remove empty lines
filter_emptyline <- FunctionFilter("qMetacellWholeLine",
                                  cmd = 'delete',
                                  pattern = 'Missing MEC')
  
 #create filter to remove contaminant
filter_contaminant <- QFeatures::VariableFilter(field = "Potential_contaminant",
                                                value = "+",
                                                condition = "==",
                                                not = TRUE)

## ----filtering, message = FALSE-----------------------------------------------
 #apply all filters and create new assay
obj <- filterFeaturesOneSE(object = obj,
                           i = length(obj),
                           name = "Filtered",
                           filters = list(filter_emptyline, filter_contaminant))

## ----removeprot, message = FALSE----------------------------------------------
 #remove proteins with no associated peptides
X <- QFeatures::adjacencyMatrix(obj[[length(obj)]])
SummarizedExperiment::rowData(obj[[length(obj)]])$adjacencyMatrix <- X[, -which(Matrix::colSums(X) == 0)]

obj

## ----normalizationmethods-----------------------------------------------------
 #list of available methods
normalizeMethods()

## ----normalization------------------------------------------------------------
obj <- normalizeFunction(obj,
                         method = "MeanCentering",
                         scaling = TRUE,
                         type = "overall")

obj

## ----normalizationplot, message = FALSE, warning=FALSE------------------------
obj1 <- obj[[length(obj)]]
obj2 <- obj[[length(obj)-1]]
protId <- DaparToolshed::idcol(obj1)

.n <- floor(0.02 * nrow(obj1))
.subset <- seq(nrow(obj1))

compareNormalizationD_HC(
  qDataBefore = SummarizedExperiment::assay(obj1),
  qDataAfter = SummarizedExperiment::assay(obj2),
  keyId = SummarizedExperiment::rowData(obj1)[, protId],
  conds = DaparToolshed::design_qf(obj)$Condition,
  n = .n,
  subset.view = .subset
)

## ----missingvalueplots, warning=FALSE-----------------------------------------
pal <- unique(GetColorsForConditions(design_qf(obj)$Condition))
pattern <- c("Missing MEC", "Missing POV")
grp <- design_qf(obj)$Condition

 #number of line with different amount of NA
metacellPerLinesHisto_HC(obj[[length(obj)]], group = grp, pattern = pattern)
 #number of line with different amount of NA per condition
metacellPerLinesHistoPerCondition_HC(obj[[length(obj)]], group = grp, pattern = pattern)

## ----missingvaluedistribplots-------------------------------------------------
hc_mvTypePlot2(obj[[length(obj)]], group = grp, pattern = pattern, pal = pal)

## ----imputation, message = FALSE----------------------------------------------
obj <- wrapperPirat(data = obj,
                     adjmat = SummarizedExperiment::rowData(obj[[length(obj)]])$adjacencyMatrix,
                     extension = "base")

obj

## ----aggregationadjmatrix-----------------------------------------------------
X <- SummarizedExperiment::rowData(obj[[length(obj)]])$adjacencyMatrix
info_pept <- getProteinsStats(X)

message("Total number of peptides : ", info_pept$nbPeptides, "\n",
    "Number of specific peptides : ", info_pept$nbSpecificPeptides, "\n",
    "Number of shared peptides : ", info_pept$nbSharedPeptides, "\n")

message("Total number of proteins : ", info_pept$nbProt, "\n",
    "Number of protein with only specific peptides : ", length(info_pept$protOnlyUniquePep), "\n",
    "Number of protein with only shared peptides : ", length(info_pept$protOnlySharedPep), "\n",
    "Number of protein with both : ", length(info_pept$protMixPep), "\n")

## ----aggregation, message = FALSE---------------------------------------------
obj <- RunAggregation(obj,
                      adjMatrix = 'adjacencyMatrix',
                      includeSharedPeptides = 'Yes_Iterative_Redistribution',
                      operator = 'Mean',
                      considerPeptides = 'allPeptides',
                      ponderation = "Global",
                      max_iter = 500
                      )

obj

## ----aggregationsummary-------------------------------------------------------
summary(SummarizedExperiment::assay(obj[["aggregated"]]))

## ----filteringprot------------------------------------------------------------
 #apply the filter to remove empty lines created
obj <- filterFeaturesOneSE(object = obj,
                           i = length(obj),
                           name = "FilterProt",
                           filters = list(filter_emptyline))

obj

## ----limma--------------------------------------------------------------------
res_pval_FC <- limmaCompleteTest(SummarizedExperiment::assay(obj[[length(obj)]]), 
                                 design_qf(obj), 
                                 comp.type = "OnevsOne")

str(res_pval_FC)

## ----foldchange, message = FALSE, warning=FALSE-------------------------------
 #logFC threshold
Foldchange_thlogFC <- 0.04
        
hc_logFC_DensityPlot(
  df_logFC = as.data.frame(res_pval_FC$logFC),
  th_logFC = Foldchange_thlogFC
  )

## ----pushpvalue---------------------------------------------------------------
pval <- unlist(res_pval_FC$P_Value)

 #push p-values for proteins with more than 60% of values derived from imputation
pval <- pushpvalue(obj,
                   pval,
                   scope = "WholeMatrix",
                   pattern = c("Imputed POV", "Imputed MEC"),
                   percent = FALSE,
                   threshold = 5,
                   operator = ">=")

message("Number of pushed p-values : ", length(which(pval > 1)), "\n")

## ----pvalcalibration----------------------------------------------------------
 #remove pushed p-values
pushedpval_ind <- which(pval > 1)
pval_nopush <- pval[-pushedpval_ind]

 #calibration plot with all methods 
calibration_all <- wrapperCalibrationPlot(vPVal = pval_nopush, 
                                          pi0Method  = "ALL")
calibration_all$pi0

## ----pvalcalibrationmethod----------------------------------------------------
 #chosen pi0
pi0 <- 1

 #calibration plot with chosen method
wrapperCalibrationPlot(vPVal = pval_nopush, 
                       pi0Method  = pi0)

 #histogram of p-values density
histPValue_HC(pval_nopush,
              bins = 50,
              pi0 = pi0
)

## ----pvalcalibrationadjpval---------------------------------------------------
 #push to 1 proteins with logFC under threshold
pval_pushfc <- pval
pval_logfc_inf_ind <- which(abs(res_pval_FC$logFC) < Foldchange_thlogFC)
pval_logfc_inf_ind <- setdiff(pval_logfc_inf_ind, pushedpval_ind)
pval_pushfc[pval_logfc_inf_ind] <- 1
 #remove pushed p-values
pval_pushfc <- pval_pushfc[-pushedpval_ind]


 #calculate adjusted p-values
adjusted_pvalues <- diffAnaComputeAdjustedPValues(
  pval_pushfc,
  pi0)

adjusted_pvalues_comp <- unlist(res_pval_FC$P_Value)
adjusted_pvalues_comp[-pushedpval_ind] <- adjusted_pvalues

## ----FDRcontrol---------------------------------------------------------------
 #define p-value threshold
pval_threshold <- 0.01
FDRcontrol_thpval <- -log10(pval_threshold)

 #get FDR
pval[pushedpval_ind] <- 1
logpval <- -log10(pval)
logpval_thpval_ind <- which(logpval >= FDRcontrol_thpval)
logpval_thpval_ind <- setdiff(logpval_thpval_ind, pval_logfc_inf_ind)
fdr <- diffAnaComputeFDR(adjusted_pvalues_comp[logpval_thpval_ind])

 #determine differentially abundant proteins
isDifferential <- isDifferential(pvalue = pval,
                                 logFC = res_pval_FC$logFC,
                                 thpvalue = FDRcontrol_thpval,
                                 thlogFC = Foldchange_thlogFC)
NbSignificant <- length(which(isDifferential == 1))

message("pvalue threshold : ", pval_threshold, "\n",
    "logpvalue threshold : ", FDRcontrol_thpval, "\n",
    "FDR : ", round(100*fdr, 2), "%\n",
    "Number significant proteins : ", NbSignificant, "\n",
    "Number of expected false discovery : ", fdr*NbSignificant, "\n")

## ----volcanoplot--------------------------------------------------------------
 #create dataframe for volcano plot
df <- data.frame(
  x = unlist(res_pval_FC$logFC),
  y = logpval,
  index = as.character(rownames(obj[[length(obj)]]))
  )
tooltip <- "proteinId"
df <- cbind(df, SummarizedExperiment::rowData(obj[[length(obj)]])[, tooltip, drop = FALSE])
colnames(df) <- gsub(".", "_", colnames(df), fixed = TRUE)
if (ncol(df) > 3) {
  colnames(df)[4:ncol(df)] <- paste0("tooltip_",
                                      colnames(df)[4:ncol(df)])
}
cond <- unique(design_qf(obj)$Condition)

 #volcano plot
diffAnaVolcanoplot_rCharts(
  df,
  th_pval = FDRcontrol_thpval,
  th_logfc = Foldchange_thlogFC,
  conditions = cond
)

## ----DAassay------------------------------------------------------------------
 #create new assay with all values of interest
obj <- QFeatures::addAssay(obj, obj[[length(obj)]], 'DifferentialAnalysis')
SummarizedExperiment::rowData(obj[[length(obj)]])$pval <- unlist(res_pval_FC$P_Value)
SummarizedExperiment::rowData(obj[[length(obj)]])$logpval <- logpval
SummarizedExperiment::rowData(obj[[length(obj)]])$logFC <- unlist(res_pval_FC$logFC)

SummarizedExperiment::rowData(obj[[length(obj)]])$adjusted_pval <- adjusted_pvalues_comp

SummarizedExperiment::rowData(obj[[length(obj)]])$isDifferential <- isDifferential

obj

## ----sessioninfo, echo=FALSE--------------------------------------------------
sessionInfo()