######################################################################
######################################################################
######################################################################
######################################################################
######################################################################
######################################################################
# by S. Hashimoto
# Sep. 2016
#
# Create a directory ./out
# For Windows, brt.functions.R
# For Mac, brt.functions.mac.R
# For a test run, set a small value for mts (line 230-231)
#
# BRT analysis includes a random or probabilistic component; therefore, the results will be subtly different each time the codes are run.
#
# For factors (landcover and texture), when the data for an id are blank (no data), the BRT analysis moves over the id number in fitted functions (e.g., when id 4 is blank, the data of id 5 become id 4 in the output in ***_relation.dat. Therefore, please see the later columns in ***_relation.dat for landcover and texture factors.
#
# Owing to the terms of use of the CMIP5 data, the data of the CMIP5 are not attached here; however, the same analysis can be applied when the CMIP5 data are attached to the data provided here.
#
######################################################################
######################################################################
######################################################################
do_BRT <- function(infilename, ds, gy, my_lr, my_tc, my_bf, my_mts){
    #####################################
    #####################################
    library(data.table)
    library(gbm)

    #For Windows, brt.functions.win.R
    #For Mac, brt.functions.mac.R
    source("brt.functions.mac.R")

    #####################################
    gdata0 <- fread(infilename)
    class(gdata0) <-"data.frame"
    gdata <<- subset(gdata0, gdata0[,gy]>=0.0)
    vname<-names(gdata)
    nr<-nrow(gdata)

    #####################################
    #11: Landcover, 14: Texture
    gdata[,11]<<-as.factor(gdata[,11])
    gdata[,14]<<-as.factor(gdata[,14])

    #####################################
    #1:id, 2: lon, 3: lat, and were not used for the analysis.
    gx<-c(4,5,6,7,8,9,10,11,12,13,14,15,16)
    soc<-gdata[,gy]

    #####################################
    cat("\n")
    cat("**********************************************************","\n")
    cat("**********************************************************","\n")
    cat("**********************************************************","\n")
    cat("SOC DB: ",vname[gy],"\n")
    cat("Variables:", vname[4:16],"\n")
    cat("**********************************************************","\n")

    #####################################
    #output file names
    #Default output from BRT code, figures of fitted function
    outfilename_fig_Func<-paste("./out/output_ds",ds,"_",vname[gy],"_Func.pdf", sep="")
    #Default output from BRT code, figures of contribution
    outfilename_fig_RI<-paste("./out/output_ds",ds,"_",vname[gy],"_RI.png", sep="")
    #Basic output 1 (contributions)
    outfilename<-paste("./out/output_ds",ds,"_",vname[gy],"_stat.dat", sep="")
    #Basic output 2 (Fitted functions)
    outfilename_relation<-paste("./out/output_ds",ds,"_",vname[gy],"_relation.dat", sep="")
    #To check the performance
    outfilename_pred<-paste("./out/output_ds",ds,"_",vname[gy],"_pred.dat", sep="")
    #To check the performance
    outfilename_pred_stat<-paste("./out/output_ds",ds,"_",vname[gy],"_pred_stat.dat", sep="")

    ######################################################################
    #BRT analysis
    ######################################################################
    result<-gbm.step(data=gdata, gbm.x=gx, gbm.y=gy,family="gaussian", tree.complexity=my_tc,learning.rate=my_lr, max.trees=my_mts,bag.fraction=my_bf, plot.main=FALSE, plot.folds=FALSE)
    #print("OK")
    gbm.plot.my(result, n.plots=9, plot.layout=c(3,3), write.title=F)
    dev.copy2pdf(file=outfilename_fig_Func)
    dev.off()
    #print("OK")
    preds<-result$fitted

    ######################################################################
    # Prediction and Statistics
    ######################################################################
    ndata<-length(soc)
    trmse<-sum((soc-preds)^2)
    rmse<-(trmse/ndata)^0.5

    #####################################
    s<-lm(soc~preds)
    ss<-summary(s)
    f.stat<-ss$fstatistic
    p.value<-1-pf(f.stat["value"],f.stat["numdf"],f.stat["dendf"])
    r2val<-ss$r.squared
    r2valadj<-ss$adj.r.squared

    #####################################
    cat(sprintf("Target: %s\n\nUsed inputs:\n", vname[gy]), file=outfilename, append=F)
    for(i in 1:length(gx))
    {
        cat(sprintf("%s\n", vname[gx[i]]), file=outfilename, append=T)
    }
    cat(sprintf("\nStatistics:\n"), file=outfilename, append=T)

    cat(sprintf("tree complexity=%d\nlearning rate=%f\nmaximum trees=%d\nbag fraction=%f\n\n",
    my_tc,
    my_lr,
    result$gbm.call$n.tree,
    my_bf), file=outfilename, append=T)


    cat(sprintf("coef(s)[2] coef(s)[1] p.value r2val r2valadj rmse ndata\n"), file=outfilename_pred_stat, append=F)

    cat(sprintf("%.5f %f %f %f %f %f %d\n\n",
    coef(s)[2],
    coef(s)[1],
    p.value,
    r2val,
    r2valadj,
    rmse,
    ndata), file=outfilename_pred_stat, append=T)


    png(outfilename_fig_RI)
    s<-summary(result, plotit=TRUE)
    dev.off()
    cat(sprintf("\nRIs:\n"), file=outfilename, append=T)
    n<-length(gx)
    for(i in 1:n)
    {
        cat(sprintf("%s %f\n", s[1]$var[i], s[2]$rel.inf[i]), file=outfilename, append=T)
    }
    cat(sprintf("\n"), file=outfilename, append=T)
    cat(sprintf("cv.statistics.correlation.mean %.4f %.4f %d %d %s\n", my_lr, my_bf, my_tc, my_mts, result$cv.statistics$correlation.mean), file=outfilename, append=T)

    #result$cv.statistics$correlation.mean
    #sink(outfilename, append=T)
    #print(s)
    #sink()

    #####################################
    cat(sprintf("id obs modeled\n"), file=outfilename_pred, append=F)
    for(i in 1:nr)
    {
        cat(sprintf("%d %f %f\n",
        gdata[i,1],
        gdata[i,gy],
        preds[i]), file=outfilename_pred, append=T)
        #data_pred[gdata[i,1]]=preds[i]
    }

    ######################################################################
    # Print relationships
    ######################################################################
    cat(sprintf(""), file=outfilename_relation, append=F)

    val<-list(NULL)
    n<-length(gx)
    for(i in 1:n)
    {
        val[[i]]<-plot.gbm(result, i.var=i, n.trees=result$gbm.call$n.tree, return.grid=TRUE, continuous.resolution=1000)
        valname<-names(val[[i]])
        cat(sprintf("%s %s ",valname[1], valname[2]), file=outfilename_relation, append=T)
    }
        cat(sprintf("landcover_id landcover count y "), file=outfilename_relation, append=T)
        cat(sprintf("texture_id texture count y\n"), file=outfilename_relation, append=T)

    for(j in 1:1000)
    {
        for(i in 1:n)
        {
            #val<-plot.gbm(result, i.var=i, ntrees=result$gbm.call$n.tree, return.grid=TRUE, continuous.resolution=1000)
            mn<-length(val[[i]][,1])
            if(j<=mn)
            {
                cat(sprintf("%f %f ",val[[i]][j,1], val[[i]][j,2]-mean(val[[i]][,2])), file=outfilename_relation, append=T)
            }
            else
            {
                cat(sprintf("NA NA "), file=outfilename_relation, append=T)
            }
        }
        #####################################
        #Factor (modislandcover)
        i<-8
        mn<-length(val[[i]][,1])
        lvs<-levels(gdata[,(i+3)])
        tbl<-table(gdata[,i+3])
        if(j<=mn)
        {
            cat(sprintf("%.0f %s %.0f %f ",val[[i]][j,1],lvs[j], tbl[j], val[[i]][j,2]-mean(val[[i]][,2])), file=outfilename_relation, append=T)
        }
        else
        {
            cat(sprintf("NA NA NA NA "), file=outfilename_relation, append=T)
        }
        #####################################
        #Factor (texture)
        i<-11
        mn<-length(val[[i]][,1])
        lvs<-levels(gdata[,i+3])
        tbl<-table(gdata[,i+3])
        if(j<=mn)
        {
            cat(sprintf("%.0f %s %.0f %f ",val[[i]][j,1], lvs[j], tbl[j], val[[i]][j,2]-mean(val[[i]][,2])), file=outfilename_relation, append=T)
        }
        else
        {
            cat(sprintf("NA NA NA NA "), file=outfilename_relation, append=T)
        }

        cat(sprintf("\n"), file=outfilename_relation, append=T)

    }
}
######################################################################
######################################################################
######################################################################
######################################################################
######################################################################
######################################################################
cat("**********************************************************","\n")
cat("**********************************************************","\n")
cat("**********************************************************","\n")
#Maximum number of trees
#mts<-15000
mts<-150 # For test run

###################################
###################################
infile<-"./dataset_global.dat"
#dataset id 1: global
dds<-1

###################################
# HWSD
ggy<-17 #Column number
lr<-0.05
tc<-5
bf<-0.7
do_BRT(infile, dds, ggy, lr, tc, bf, mts)

###################################
# IGBPDIS
ggy<-18 #Column number
lr<-0.05
tc<-5
bf<-0.7
do_BRT(infile, dds, ggy, lr, tc, bf, mts)

###################################
###################################
infile<-"./dataset_north.dat"
#dataset id 4: north
dds<-4

###################################
# HWSD
ggy<-17 #Column number
lr<-0.05
tc<-5
bf<-0.7
do_BRT(infile, dds, ggy, lr, tc, bf, mts)

###################################
# IGBPDIS
ggy<-18 #Column number
lr<-0.05
tc<-5
bf<-0.5
do_BRT(infile, dds, ggy, lr, tc, bf, mts)

###################################
# NCSCD
ggy<-19 #Column number
lr<-0.005
tc<-5
bf<-0.7
do_BRT(infile, dds, ggy, lr, tc, bf, mts)

cat("**********************************************************","\n")
cat("**********************************************************","\n")
cat("**********************************************************","\n")
######################################################################
######################################################################
######################################################################
