#Checks if a given number is prime or not.
check_prime <- function(n) {

  n <- abs(n)

  if(n < 2) {

    return(FALSE)
  } else if(n == 2) {

    return(TRUE)
  } else if(sqrt(n) < 3){

    return(n %% 2 != 0)
  } else {

    for(i in seq(3, as.integer(sqrt(n)), by=2)) {

      if(n %% i == 0) {

        return(FALSE)
      }
    }
  }

  return(TRUE)
}

#Creates a circulant matrix row-by-row given the first row.
circulant_matrix <- function(first.row) {

  size <- length(first.row)

  circulant.matrix <- matrix(nrow=size, ncol=size)
  circulant.matrix[1,] <- first.row

  if(size >= 2) {

    for(i in 2:size) {

      circulant.matrix[i,] <- c(circulant.matrix[i-1,size], circulant.matrix[i-1,1:(size-1)])
    }
  }

  return(circulant.matrix)
}

#Constructs a Hadamard matrix using Paley's method.
paley_construction <- function(size, type) {

  if(type == 1) {

    prime <- size - 1
  } else if (type == 2) {

    prime <- size/2 - 1
  }

  Q <- outer(0:(prime-1), 0:(prime-1), `-`)
  Q[(Q %% prime) %in% ((1:(prime-1))^2 %% prime)] <- 1
  Q[!((Q %% prime) %in% ((1:(prime-1))^2 %% prime))] <- -1
  diag(Q) <- 0

  if(type == 1) {

    H <- rbind(1, cbind(-1, Q + diag(prime)))
  } else if(type == 2) {

    H.template <- rbind(c(0, rep(1,prime)), cbind(1, Q))
    H <- matrix(nrow=2*(prime+1), ncol=2*(prime+1))

    for(i in 1:(prime+1)) {

      for(j in 1:(prime+1)) {

        if(H.template[i,j] == 0) {

          H[(2*i-1):(2*i), (2*j-1):(2*j)] <- matrix(c(1,-1,-1,-1), nrow=2, ncol=2)
        } else if(H.template[i,j] == 1) {

          H[(2*i-1):(2*i), (2*j-1):(2*j)] <- matrix(c(1,1,1,-1), nrow=2, ncol=2)
        } else {

          H[(2*i-1):(2*i), (2*j-1):(2*j)] <- matrix(c(-1,-1,-1,1), nrow=2, ncol=2)
        }
      }
    }
  }

  return(H)
}

#Constructs a Hadamard matrix using Williamson's method.
williamson_construction <- function(size) {

  sequences <- williamson.sequences[williamson.sequences$order == size/4, ,drop=FALSE]

  A <- circulant_matrix(sequences$value[sequences$matrix == "A"])
  B <- circulant_matrix(sequences$value[sequences$matrix == "B"])
  C <- circulant_matrix(sequences$value[sequences$matrix == "C"])
  D <- circulant_matrix(sequences$value[sequences$matrix == "D"])

  H <- rbind(cbind(A, B, C, D),
             cbind(-B, A, -D, C),
             cbind(-C, D, A, -B),
             cbind(-D, -C, B, A))
  return(H)
}

#Constructs a Hadamard matrix using Sylvester's method.
sylvester_construction <- function(size) {

  H <- generate_hadamard_matrix(size/2)

  if(!is.null(H)) {

    H <- rbind(cbind(H, H), cbind(H, -H))
  }

  return(H)
}

#Constructs a Hadamard matrix of a specified size, returns NULL if it doesn't exist.
generate_hadamard_matrix <- function(size) {

  if(size <= 0) {

    stop("Hadamard matrix size must be a positive integer.")
  }

  if(size %% 4 != 0) {

    stop("Hadamard matrix size must be a multiple of 4.")
  }

  if(check_prime(size - 1)) {

    hadamard.matrix <- paley_construction(size, type=1)
  } else if(((size/2) %% 4 == 2) && check_prime(size/2 - 1)) {

    hadamard.matrix <- paley_construction(size, type=2)
  } else if((size/4) %in% williamson.orders) {

    hadamard.matrix <- williamson_construction(size)
  } else if(size %% 8 == 0) {

    hadamard.matrix <- sylvester_construction(size)
  } else {

    hadamard.matrix <- NULL
  }

  return(hadamard.matrix)
}

#Performs post-stratification adjustment to replicate weights
poststrat_adjust <- function(weight.data,
                             cell,
                             base.weight,
                             rep.weights,
                             is.brr) {

  #Post-stratification cell totals
  cell.totals <- aggregate(weight.data[,c(base.weight, rep.weights)],
                           by=weight.data[,cell,drop=FALSE],
                           sum)

  #Check for any cells with zero weight (can happen with a Fay factor of 1)
  if(any(cell.totals[,rep.weights] == 0)) {

    if(is.brr) {

      stop("Post-stratification cell(s) have zero weight in at least one replicate. Please reconfigure strata/PSUs or use a Fay factor other than 1.")
    } else {

      stop("Post-stratification cell(s) have zero weight in at least one replicate. Please reconfigure strata/PSus.")
    }
  }

  #Post-stratification adjustment
  cell.ratios <- paste0("Ratio_", 1:length(rep.weights))
  cell.totals[,cell.ratios] <- cell.totals[,base.weight]/cell.totals[,rep.weights]

  weight.data <- merge(weight.data,
                       cell.totals[,c(cell, cell.ratios)],
                       by=cell)
  weight.data[,rep.weights] <- weight.data[,rep.weights]*weight.data[,cell.ratios]

  return(weight.data)
}

#Rounds weights in each post-stratification cell to nearest integer, remainders are balanced to reduce rounding error.
integerize_weights <- function(weight.data,
                               weights,
                               cell=NULL) {

  weight.matrix <- as.matrix(weight.data[,weights])
  num.weights <- ncol(weight.matrix)
  num.obs <- nrow(weight.matrix)

  if(!is.null(cell)) {

    cell.values <- weight.data[,cell,drop=TRUE]
  } else {

    cell.values <- rep(1, num.obs)
  }

  cell.levels <- unique(cell.values)
  num.cells <- length(cell.levels)

  int.weights <- matrix(nrow=num.obs, ncol=num.weights)

  raw.sum <- matrix(0, nrow=num.cells, ncol=num.weights)
  int.sum <- matrix(0, nrow=num.cells, ncol=num.weights)

  for(i in seq_len(num.obs)) {

    cell.num <- which(cell.levels == cell.values[i])

    raw.sum[cell.num,] <- raw.sum[cell.num,] + weight.matrix[i,]
    int.weights[i,] <- round(raw.sum[cell.num,]) - int.sum[cell.num,]
    int.sum[cell.num,] <- int.sum[cell.num,] + int.weights[i,]
  }

  weight.data[,weights] <- int.weights

  return(weight.data)
}

#' Generate BRR weights for a survey dataset
#'
#' @description This function creates balanced repeated replication (BRR)
#'   weights for variance estimation in survey data.
#'
#' @section About BRR: Balanced repeated replication (BRR) is a variance
#'   estimation technique used for stratified survey data. Like ordinary
#'   bootstrap, BRR uses a set of replicates resampled from the original data.
#'   BRR differs from bootstrap in that BRR replicates are structured rather
#'   than randomly sampled. In order for BRR to work properly, the survey data
#'   must be structured so that each strata has exactly two primary sampling
#'   units (PSUs). A Hadamard matrix is used to structure each replicate so that
#'   one PSU from each strata is used per replicate. The size of the Hadamard
#'   matrix determines the number of BRR replicate weights, and it is usually
#'   close to the number of strata. This means that in BRR is often more
#'   efficient than bootstrap in datasets where it can be used.
#'
#' @section Fay's method: Standard BRR uses only half of the PSUs in the dataset
#'   for each replicate. Fay's method uses the full dataset for each replicate
#'   but adjusts the weighting of each PSU based on a value between 0 and 1
#'   called the Fay factor 'f'. If a PSU would be taken out of the sample in
#'   standard BRR, it is instead given a weight of 1 - f when using Fay's
#'   method. Likewise, PSUs that would be kept in the sample are instead given a
#'   weight of 1 + f. A Fay factor of 1 is equivalent to using standard BRR.
#'
#' @section Post-stratification: The design of a survey study usually will have
#'   expected proportions that different groups should have in the full
#'   population. Post-stratification adjustment is used to adjust observation
#'   weights so that the influence of each group (called a post-stratification
#'   cell) in an analysis is proportional to the group's actual proportion in
#'   the population.
#'
#'
#' @param input.data A data frame.
#' @param id Variable that identifies each subject.
#' @param strata Variable that identifies the strata that an observation is in.
#' @param psu Variable that identifies which PSU within a strata that an
#'   observation is in. There must be exactly two unique values of this variable
#'   for each value of `strata`.
#' @param cell Variable that identifies which post-stratification cell that an
#'   observation is in. Used to perform post-stratification adjustment of
#'   replicate weights. If `NULL`, no post-stratification adjustment is
#'   performed. (default = `NULL`).
#' @param weight Base weighting variable for the dataset. If `NULL`, all
#'   observations are treated as having an equal weight of 1. (default = `NULL`)
#' @param fay.factor A number between 0 and 1 that adjusts the weight of PSUs
#'   for Fay's method. The default value of 1 performs standard BRR.
#'
#' @returns A `data.frame` containing all of the variables in `input.data` plus
#'   the following columns:
#' * `RepWt_0`: The base weight.
#' * `RepWt_1`-`RepWt_N`: BRR weights for replicates 1 to N.
#'
#'   Weights are integerized for compatibility with modeling functions.
#'
#' @export
#'
#' @examples
#' brr.data <- brr_weights(input.data=nhcvd,
#'                         id="SEQN",
#'                         strata="SDMVSTRA",
#'                         psu="SDMVPSU",
#'                         cell="PSCELL",
#'                         weight="WTDRD1",
#'                         fay.factor=0.7)
#'
#' #base weight and first 8 BRR replicate weights
#' head(brr.data[,paste0("RepWt_", 0:8)])
brr_weights <- function(input.data,
                        id,
                        strata,
                        psu,
                        cell=NULL,
                        weight=NULL,
                        fay.factor=1) {

  #Subset to unique subjects and sort by strata and PSU
  subject.data <- input.data[!duplicated(input.data[,id]),]
  subject.data <- subject.data[order(subject.data[,strata,drop=TRUE], subject.data[,psu,drop=TRUE]),]

  psu.levels <- unique(subject.data[,c(strata, psu)])
  num.psu <- nrow(psu.levels)
  num.strata <- nrow(unique(psu.levels[,strata,drop=FALSE]))

  #Check that each strata has exactly 2 PSUs, throw error otherwise
  psu.counts <- aggregate(psu.levels[,psu,drop=FALSE],
                          by=psu.levels[,strata,drop=FALSE],
                          length)

  if(any(psu.counts[,psu] != 2)) {

    stop("All strata must have exactly 2 PSUs.")
  }

  #Generate Hadamard matrix
  hadamard.matrix <- NULL
  hadamard.size <- num.strata + 4 - num.strata %% 4
  while(is.null(hadamard.matrix)) {

    hadamard.matrix <- generate_hadamard_matrix(hadamard.size)

    if(is.null(hadamard.matrix)) {

      hadamard.size <- hadamard.size + 4
    }
  }

  #drop columns of all 1s or -1s
  hadamard.matrix <- hadamard.matrix[,apply(hadamard.matrix, 2, function(column) min(column) != max(column))]

  #create BRR weight multipliers for each PSU and strata
  num.reps <- nrow(hadamard.matrix)

  brr.factors <- matrix(nrow=num.psu, ncol=num.reps)
  brr.factors[seq_len(num.psu) %% 2 == 1,] <- 1 + fay.factor*t(hadamard.matrix[,1:num.strata])
  brr.factors[seq_len(num.psu) %% 2 == 0,] <- 1 - fay.factor*t(hadamard.matrix[,1:num.strata])
  colnames(brr.factors) <- paste0("RepWt_", 1:num.reps)

  #Calculate BRR weights using the multipliers
  brr.data <- merge(subject.data,
                    data.frame(psu.levels, brr.factors),
                    by=c(strata, psu))

  if(!is.null(weight)) {

    brr.data[,"RepWt_0"] <- brr.data[,weight]
  } else {

    brr.data[,"RepWt_0"] <- rep(1, nrow(brr.data))
  }
  brr.data[,paste0("RepWt_", 1:num.reps)] <- brr.data[,paste0("RepWt_", 1:num.reps)]*brr.data[,"RepWt_0"]

  #Post-stratification adjustment
  if(!is.null(cell)) {

    brr.data <- poststrat_adjust(weight.data=brr.data,
                                 cell=cell,
                                 base.weight="RepWt_0",
                                 rep.weights=paste0("RepWt_", 1:num.reps),
                                 is.brr=TRUE)
  }

  #Integerize weights
  brr.data <- integerize_weights(weight.data=brr.data,
                                 weights=paste0("RepWt_", 0:num.reps),
                                 cell=cell)

  #Merge weights into full dataset
  input.data[,paste0("RepWt_", 0:num.reps)] <- NULL
  output.data <- merge(input.data,
                       brr.data[,c(id, paste0("RepWt_", 0:num.reps))],
                       by=id)

  return(output.data)
}


#' Generate bootstrap weights for a survey dataset
#'
#' @description This function creates bootstrap weights for variance estimation
#'   in survey data.
#'
#' @section About Bootstrap: Bootstrapping is a resampling technique that can be
#'   used for variance estimation in survey data. Each bootstrap replicate
#'   weight set is equivalent to sampling the primary sampling units (PSUs) in
#'   the original dataset with replacement. The strata can have an arbitrary
#'   number of PSUs within them. Often, each individual subject is treated as
#'   its own PSU. Many replicates need to be performed to provide accurate
#'   estimates of the variance. In practice, this is generally around 200-500
#'   replicates. The ideal number can vary depending on the dataset, and survey
#'   datasets will sometimes specify how many bootstrap replicates are
#'   recommended.
#'
#' @inheritSection brr_weights Post-stratification
#'
#' @inheritParams brr_weights
#' @param psu Variable that identifies which PSU within a strata that an
#'   observation is in.
#' @param num.reps Number of bootstrap replicate weights. (default = 200).
#' @param boot.seed Integer starting seed for the random number generator. If
#'   `NULL`, uses a randomly generated integer from -10^7 to 10^7, exclusive.
#'   (default = `NULL`)
#'
#' @returns A `data.frame` containing all of the variables in `input.data` plus
#'   the following columns:
#'
#' * `RepWt_0`: The base weight.
#' * `RepWt_1`-`RepWt_N`: BRR weights for replicates 1 to N.
#'
#'   Weights are integerized for compatibility with modeling functions.
#'
#'   The output also contains the following attribute:
#' * `boot.seed`: The random number generator seed used to generate the replicate weights.
#'
#' @export
#'
#' @examples
#' boot.data <- boot_weights(input.data=sim_g_whole,
#'                           id="ID",
#'                           strata="STRATA",
#'                           psu="ID",
#'                           num.reps=25,
#'                           boot.seed=999)
#'
#' #base weight and first 8 bootstrap replicate weights
#' head(boot.data[,paste0("RepWt_", 0:8)])
boot_weights <- function(input.data,
                         id,
                         strata,
                         psu,
                         cell=NULL,
                         weight=NULL,
                         num.reps=200,
                         boot.seed=NULL) {

  if(num.reps < 1) {

    stop("The number of bootstrap replicates must be at least 1.")
  }

  #Set seed
  if(is.null(boot.seed)) {

    boot.seed <- as.integer(runif(1, min=-0.5, max=0.5)*2*10^7)
  }

  set.seed(boot.seed)

  #Subset to unique subjects and sort by strata and PSU
  subject.data <- input.data[!duplicated(input.data[,id]),]
  subject.data <- subject.data[order(subject.data[,strata,drop=TRUE], subject.data[,psu,drop=TRUE]),]

  psu.levels <- unique(subject.data[,c(strata, psu)])
  num.psu <- nrow(psu.levels)

  strata.levels <- unique(psu.levels[,strata,drop=TRUE])
  num.strata <- length(strata.levels)

  #Number of bootstrap hits for each PSU and strata
  num.hits <- matrix(nrow=num.psu, ncol=num.reps)
  size.adjust <- numeric(num.psu)

  for(s in 1:num.strata) {

    psu.in.strata <- which(psu.levels[,strata, drop=TRUE] == strata.levels[s])
    num.psu.strata <- length(psu.in.strata)

    num.hits[psu.in.strata,] <- replicate(num.reps,
                                          table(factor(sample(1:num.psu.strata, num.psu.strata - 1, replace=TRUE), levels=1:num.psu.strata)))
    size.adjust[psu.in.strata] <- num.psu.strata/(num.psu.strata - 1)
  }

  colnames(num.hits) <- paste0("RepWt_", 1:num.reps)

  #Calculate bootstrap weights using the number of hits
  boot.data <- merge(subject.data,
                     data.frame(psu.levels, num.hits),
                     by=c(strata, psu))

  if(!is.null(weight)) {

    boot.data[,"RepWt_0"] <- boot.data[,weight]
  } else {

    boot.data[,"RepWt_0"] <- rep(1, nrow(boot.data))
  }
  boot.data[,paste0("RepWt_", 1:num.reps)] <- boot.data[,paste0("RepWt_", 1:num.reps)]*size.adjust*boot.data[,"RepWt_0"]

  #Post-stratification adjustment
  if(!is.null(cell)) {

    boot.data <- poststrat_adjust(weight.data=boot.data,
                                  cell=cell,
                                  base.weight="RepWt_0",
                                  rep.weights=paste0("RepWt_", 1:num.reps),
                                  is.brr=FALSE)
  }

  #Integerize weights
  boot.data <- integerize_weights(weight.data=boot.data,
                                  weights=paste0("RepWt_", 0:num.reps),
                                  cell=cell)

  #Merge weights into full dataset
  input.data[,paste0("RepWt_", 0:num.reps)] <- NULL
  output.data <- merge(input.data,
                       boot.data[,c(id, paste0("RepWt_", 0:num.reps))],
                       by=id)

  attr(output.data, "boot.seed") <- boot.seed

  return(output.data)
}

