lphom function

The function lphom(votes_election1, votes_election2, new_and_exit_voters = “regular”, structural_zeros = NULL, verbose = T) implements the algorithm proposed in: Romero, R, Pavía, JM, Martín, J and Romero G (2020) “Assessing uncertainty of voter transitions estimated from aggregated data. Application to the 2017 French presidential election”, Journal of Applied Statistics, online available. DOI: 10.1080/02664763.2020.1804842.

lphom <- function(votes_election1, votes_election2,
                  new_and_exit_voters = c("regular", "raw", "simultaneous", "full", "gold"),
                  structural_zeros = NULL, verbose = F){
    
    # Loading package lpSolve
    if (!require(lpSolve)) install.packages("lpSolve", repos = "http://cran.rstudio.com")
        require(lpSolve)
    
    # inputs
    inputs <- list("votes_election1" = votes_election1, "votes_election2" = votes_election2,
                   "new_and_exit_voters" = new_and_exit_voters, "structural_zeros" = structural_zeros,
                   "verbose" = verbose)
    
    # Data conditions
    x = as.matrix(votes_election1)
    y = as.matrix(votes_election2)
    if (nrow(x) != nrow(y))
       stop('The number of spatial units is different in origin and destination.')
    new_and_exit_voters = new_and_exit_voters[1]
    if (new_and_exit_voters %in% c("simultaneous", "full", "gold")){
        if (!identical(round(rowSums(x)), round(rowSums(y)))){
            texto <- paste0('The number of voters (electors) in Election 1 and',
                            'Election 2 differ in at least a territorial unit. This is not ',
                            'allowed in a \"', new_and_exit_voters, ' scenario \".')
        stop(texto)
    }
    }
    if (min(x,y) < 0) stop('Negative values for voters (electors) are not allowed')
    
    # Data preparation
    net_entries = net_exits = T
    tt = sum(x)
    if (new_and_exit_voters %in% c("regular", "raw")){
        NET_ENTRIES = NET_EXITS = rep(0,nrow(x))
        x = cbind(x, NET_ENTRIES); y = cbind(y, NET_EXITS)

    # Estimation of net entries/exits in the election census
    d = rowSums(y) - rowSums(x)
    if (any(d != 0)) {
        th = sum(d[d > 0])
        te = -sum(d[d < 0])
        message(paste0('*********************WARNING*********************\n',
       'You are in a \"', new_and_exit_voters, '\" scenario.\n',
       'The sums (by row) of origin and destination data differ. ',
       'It is, for at least a spatial unit, the total number ',
       'of electors in both elections is not the same. \n',
       '\n To guarantee the matching: A new category of census entries ',
       '(NET_ENTRIES) is included in the origin election and ',
       'a new category of census exits (NET_EXITS) ',
       'is also included in the destination election.\n',
       '\n %NET_ENTRIES=',100*th/tt,'% %NET_EXITS=',100*te/tt,'%\n',
       '\n If NET_ENTRIES and/or NET_EXITS are really small, less than 1% ',
       'in all units, their results will not be displayed\n',
       '\n See paper "Assessing uncertainty of voter transitions estimated from',
       ' aggregated data. Application to 2017 French presidential elections" for details. \n',
       '*************************************************'))
        x[,ncol(x)] = d*(d > 0)
        y[,ncol(y)] = -d*(d < 0)
    }
    
   # Net entries and exits
     if (sum(x[,ncol(x)]) == 0){
       net_entries = F
       x = x[,-ncol(x)]
     }
     if (sum(y[,ncol(y)]) == 0){
       net_exits = F
       y = y[,-ncol(y)]
     }
   }
    # Parameters
    I = nrow(x); J = ncol(x); K = ncol(y); JK = J*K; IK = I*K
    # Names of election options
    names1 = colnames(x); names2 = colnames(y)
    # Constraints sum(pjk)=1
    a1 = matrix(0, J, JK)
    for (j in 1:J) {
        a1[j,((j-1)*K+1):(j*K)]=1
    }
    a1 = cbind(a1, matrix(0, J, 2*IK))
    b1 = rep(1,J)
    # Constraints total match for K parties
    xt = colSums(x)
    yt = colSums(y)
    at = matrix(0, K, JK+2*IK)
    for (j in 1:J){
        at[,((j-1)*K+1):(j*K)] = diag(xt[j], K)
    }
    bt = yt
    # Constraints to match votes in the I spatial units and K parties
    ap = matrix(0, 0, JK+2*IK)
    bp = NA
    for (i in 1:I) {
        ai = matrix(0, K, 0)
        for (j in 1:J) {ai = cbind(ai,diag(x[i,j],K))}
        ai = cbind(ai,matrix(0,K,2*IK))
        bi= y[i,]
        ap = rbind(ap,ai)
        bp = c(bp,bi)
    }
    bp = bp[-1]
    for (f in 1:IK) {ap[f,JK+c(2*f-1,2*f)] = c(1,-1)}
    # Joining the three sets of constraints
    ae = rbind(a1,at,ap)
    be = c(b1,bt,bp)
    # Raw scenario. Constraints pjk(J, K)=0 & pjk(j,K) constant, related,
    # respectively, to net entries and net exits.
    if (new_and_exit_voters == "raw" & net_exits){
        if (net_entries){
            pb = yt[K]/sum(xt[1:(J-1)])
            ab = matrix(0,J,JK+2*IK)
            bb = rep(0,J)
            for (j in 1:J){
                ab[j,j*K] = 1
                bb[j]=pb*(j<J)
            }
            ae = rbind(ae,ab)
            be = c(be,bb)
        } else {
            pb = yt[K]/sum(xt[1:J])
            ab = matrix(0,J,JK+2*IK)
            bb = rep(0,J)
            for (j in 1:J){
                ab[j,j*K] = 1
                bb[j]=pb
            }
            ae = rbind(ae,ab)
            be = c(be,bb)
        }
    }
    # Regular scenario. Constraint pjk(J, K)= pik(J-1,K) = 0 related to new young
    # voters and net entries and pjk(j,K) constant related to net exits.
    if (new_and_exit_voters == "regular" & net_exits){
        if (net_entries){
            pb = yt[K]/sum(xt[1:(J-2)])
            ab = matrix(0,J,JK+2*IK)
            bb = rep(0,J)
            for (j in 1:J){
                ab[j,j*K] = 1
                bb[j]=pb*(j<(J-1))
            }
            ae = rbind(ae,ab)
            be = c(be,bb)
        } else {
            pb = yt[K]/sum(xt[1:(J-1)])
            ab = matrix(0,J,JK+2*IK)
            bb = rep(0,J)
            for (j in 1:J){
                ab[j,j*K] = 1
                bb[j]=pb*(j<J)
            }
            ae = rbind(ae,ab)
            be = c(be,bb)
        }
    }
    # Full scenario. Constraint pjk(J, K)= pik(J-1,K) = 0 related to new young
    # voters and immigrants and pjk(j,K) constant related to exits.
    if (new_and_exit_voters == "full"){
        pb = yt[K]/sum(xt[1:(J-2)])
        ab = matrix(0,J,JK+2*IK)
        bb = rep(0,J)
        for (j in 1:J){
            ab[j,j*K] = 1
            bb[j]=pb*(j<J-1)
        }
        ae = rbind(ae,ab)
        be = c(be,bb)
    }
    # Gold scenario. Constraints pjk(J, K-1) = pik(J,K) = pjk(J-1, K-1) =
    # = pik(J-1,K) =0 related to new young voters and immigrants and
    # pjk(j,K-1) and pjk(j,K) constant related to exits.
    if (new_and_exit_voters == "gold"){
    # Column K-1
        pb = yt[K-1]/sum(xt[1:(J-2)])
        ab = matrix(0,J,JK+2*IK)
        bb = rep(0,J)
        for (j in 1:J){
            ab[j,(K-1)+(j-1)*K] = 1
            bb[j]=pb*(j<(J-1))
        }
        ae = rbind(ae,ab)
        be = c(be,bb)
        # Column K
        pb = yt[K]/sum(xt[1:(J-2)])
        ab = matrix(0,J,JK+2*IK)
        bb = rep(0,J)
        for (j in 1:J){
            ab[j,j*K] = 1
            bb[j]=pb*(j<(J-1))
        }
        ae = rbind(ae,ab)
        be = c(be,bb)
    }
    # Structural zero restrictions introduced by the user
    if (length(structural_zeros) > 0){
        ast = matrix(0, length(structural_zeros), JK+2*IK)
        bst = rep(0, length(structural_zeros))
        for (i in 1:length(structural_zeros)){
            ast[i, K*(structural_zeros[[i]][1]-1)+structural_zeros[[i]][2]] = 1
        }
        ae = rbind(ae,ast)
        be = c(be,bst)
    }
    # Objective function, to minimize
    f = c(rep(0,JK), rep(1,2*IK))
    # Solution
    sol=suppressWarnings(lpSolve::lp('min', f, ae, rep('=', length(be)), be))
    z = sol$solution
    pjk = matrix(z[1:JK], J, K, TRUE, dimnames = list(names1,names2))
    e = z[(JK+1):(JK+2*IK)]
    e = matrix(e,2,IK)
    e = e[1,] - e[2,]
    eik = t(matrix(e,K,I))
    colnames(eik) = names2
    rownames(eik) = rownames(x)
    EHet = eik
    pkj = matrix(0, K, J, dimnames=list(names2,names1))
    for (k in 1:K) {
        for (j in 1:J) {
            pkj[k,j]=xt[j]*pjk[j,k]/yt[k]
        }
    }
    pjk.complete <- pjk
    if (new_and_exit_voters %in% c("regular", "raw")){
        if (net_entries & max(x[,ncol(x)]/rowSums(x)) < 0.01){
           pjk = pjk[-J,]; eik = eik[-J,]; pkj = pkj[,-J]
        }
        if (net_exits & max(y[,ncol(y)]/rowSums(y)) < 0.01){
           pjk = pjk[,-K]; eik = eik[,-K]; pkj = pkj[-K,]
        }
    }
    HIe = 100*sum(abs(eik))/tt
    pjk = round(100*pjk,2)
    pkj = round(100*pkj,2)
    if (verbose){
        cat("\n\nEstimated Heterogeneity Index HIe:",round(HIe, 2),"%\n")
        cat("\n\n Matrix of vote transitions (in %) from Election 1 to Election 2\n\n")
        print(pjk)
        cat("\n\n Origin (%) of the votes obtained in Election 2\n\n")
        print(pkj)
    }
    return(list("VTM" = pjk, "OTM" = pkj, "EHet" = EHet, "HTEe" = HIe, 
                "inputs" = inputs, "origin" = x, "destination" = y, "VTM.complete" = pjk.complete))
}