library(randomForest)
library(pROC)
library(ade4)
library(vegan)
library(gtools)

iterative_rf_multiple_with_null_distribution = function(data,window1,window2,disease,control,training_iter,testing_iter,trainsize,testsize)
{
	set.seed(150);
	featureProfile <- as.data.frame(matrix(NA,training_iter,ncol(data)));
	AUCArraySelf <- matrix(NA,training_iter,testing_iter)
	SensitivityArraySelf <- matrix(NA,training_iter,testing_iter)
	SpecificityArraySelf <- matrix(NA,training_iter,testing_iter)
	AUCArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SensitivityNull1 <- matrix(NA,training_iter,testing_iter)
	SpecificityNull1 <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNull1 <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull2 <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNull2 <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNull2 <- matrix(NA,training_iter,testing_iter)
	#threshold <- ifelse(length(intersect(window,disease)) <= 20,10,20);
	for(i in 1:training_iter)
	{
		if(length(intersect(window1,disease)) > trainsize)
		{
			trainDisease <- sample(intersect(window1,disease),trainsize,replace=FALSE);
		}
		else
		{
			trainDisease <- sample(intersect(window1,disease),trainsize,replace=TRUE);
		}

		if(length(intersect(window1,control)) > trainsize)
		{
			trainControl <- sample(intersect(window1,control),trainsize,replace=FALSE);
		}
		else
		{
			trainControl <- sample(intersect(window1,control),trainsize,replace=TRUE);
		}
		tempTrain <- rbind(data[trainDisease,],data[trainControl,]);
		rownames(tempTrain) <- make.names(rownames(tempTrain),unique=TRUE);
		TrainDiseaseTags <- NULL;
		TrainDiseaseTags[1:length(trainDisease)] <- "Diseased";
		TrainControlTags <- NULL;
		TrainControlTags[1:length(trainControl)] <- "Control";
		TrainTags <- c(TrainDiseaseTags,TrainControlTags);
		rfTempComp <- randomForest(as.factor(TrainTags)~.,tempTrain);
		#print("model created");
		featureProfile[i,] <- sapply(colnames(tempTrain),function(x)(ifelse(x %in% rownames(rfTempComp$importance),rfTempComp$importance[x,],0)));
		for(k in 1:testing_iter)
		{
			if(length(setdiff(intersect(window1,disease),trainDisease)) > testsize)
			{
				testDiseaseSelf <- sample(setdiff(intersect(window1,disease),trainDisease),testsize,replace=FALSE);
			}
			else
			{
				testDiseaseSelf <- sample(setdiff(intersect(window1,disease),trainDisease),testsize,replace=TRUE);
			}

			if(length(setdiff(intersect(window1,control),trainControl)) > testsize)
			{
				testControlSelf <- sample(setdiff(intersect(window1,control),trainControl),testsize,replace=FALSE);
			}
			else
			{
				testControlSelf <- sample(setdiff(intersect(window1,control),trainControl),testsize,replace=TRUE);
			}

			if(length(intersect(window2,disease)) > testsize)
			{
				testDiseaseNonSelf <- sample(intersect(window2,disease),testsize,replace=FALSE);
			}
			else
			{
				testDiseaseNonSelf <- sample(intersect(window2,disease),testsize,replace=TRUE);
			}
		
			if(length(intersect(window2,control)) > testsize)
			{
				testControlNonSelf <- sample(intersect(window2,control),testsize,replace=FALSE);
			}
			else
			{
				testControlNonSelf <- sample(intersect(window2,control),testsize,replace=TRUE);
			}
			tempTestSelf <- rbind(data[testDiseaseSelf,],data[testControlSelf,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseSelf)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlSelf)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestSelf) <- make.names(rownames(tempTestSelf),unique=TRUE);
			rfTempPredictSelf <- predict(rfTempComp,tempTestSelf,type="vote",norm.votes=TRUE);
			print(i)
			print(k)
			AUCArraySelf[i,k] <- auc(TestTags,rfTempPredictSelf[,2])[1];
			print(AUCArraySelf[i,k])
			SensitivityArraySelf[i,k] <- length(which(predict(rfTempComp,tempTestSelf[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestSelf[1:testsize,]));
			SpecificityArraySelf[i,k] <- length(which(predict(rfTempComp,tempTestSelf[(testsize+1):nrow(tempTestSelf),])=="Control"))/length(predict(rfTempComp,tempTestSelf[(testsize+1):nrow(tempTestSelf),]));
		
			tempTestNonSelf <- rbind(data[testDiseaseNonSelf,],data[testControlNonSelf,]);
			#print(testDiseaseSelf)
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNonSelf)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNonSelf)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNonSelf) <- make.names(rownames(tempTestNonSelf),unique=TRUE);
			rfTempPredictNonSelf <- predict(rfTempComp,tempTestNonSelf,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNonSelf[i,k] <- auc(TestTags,rfTempPredictNonSelf[,2])[1];
			print(AUCArrayNonSelf[i,k])
			SensitivityArrayNonSelf[i,k] <- length(which(predict(rfTempComp,tempTestNonSelf[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNonSelf[1:testsize,]));
			SpecificityArrayNonSelf[i,k] <- length(which(predict(rfTempComp,tempTestNonSelf[(testsize+1):nrow(tempTestNonSelf),])=="Control"))/length(predict(rfTempComp,tempTestNonSelf[(testsize+1):nrow(tempTestNonSelf),]));
			
			
			testDiseaseNull1 <- sample(sample(c(testDiseaseSelf,testDiseaseNonSelf)),testsize,replace=FALSE)
			testControlNull1 <- sample(sample(c(testControlSelf,testControlNonSelf)),testsize,replace=FALSE)
			
			tempTestNull1 <- rbind(data[testDiseaseNull1,],data[testControlNull1,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNull1)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNull1)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNull1) <- make.names(rownames(tempTestNull1),unique=TRUE);
			rfTempPredictNull1 <- predict(rfTempComp,tempTestNull1,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNull1[i,k] <- auc(TestTags,rfTempPredictNull1[,2])[1];
			print(AUCArrayNull1[i,k])
			SensitivityArrayNull1[i,k] <- length(which(predict(rfTempComp,tempTestNull1[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNull1[1:testsize,]));
			SpecificityArrayNull1[i,k] <- length(which(predict(rfTempComp,tempTestNull1[(testsize+1):nrow(tempTestNull1),])=="Control"))/length(predict(rfTempComp,tempTestNull1[(testsize+1):nrow(tempTestNull1),]));

			testDiseaseNull2 <- sample(sample(c(testDiseaseSelf,testDiseaseNonSelf)),testsize,replace=FALSE)	
			testControlNull2 <- sample(sample(c(testControlSelf,testControlNonSelf)),testsize,replace=FALSE)
		
			tempTestNull2 <- rbind(data[testDiseaseNull2,],data[testControlNull2,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNull2)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNull2)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNull2) <- make.names(rownames(tempTestNull2),unique=TRUE);
			rfTempPredictNull2 <- predict(rfTempComp,tempTestNull2,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNull2[i,k] <- auc(TestTags,rfTempPredictNull2[,2])[1];
			SensitivityArrayNull2[i,k] <- length(which(predict(rfTempComp,tempTestNull2[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNull2[1:testsize,]));
			SpecificityArrayNull2[i,k] <- length(which(predict(rfTempComp,tempTestNull2[(testsize+1):nrow(tempTestNull2),])=="Control"))/length(predict(rfTempComp,tempTestNull2[(testsize+1):nrow(tempTestNull2),]));	

			k <- k + 1;
		}	
		
		i <- i + 1;
	}
	colnames(featureProfile) <- colnames(tempTrain);
	#returnList <- list("featureProfile"=featureProfile,"AUC"=AUCArray,"Accuracy"=AccuracyArray);
	returnList <- list("AUCSameAge"=AUCArraySelf,"SensitivitySameAge"=SensitivityArraySelf,"SpecificitySameAge"=SpecificityArraySelf,"AUCDiffAge"=AUCArrayNonSelf,"SensitivityDiffAge"=SensitivityArrayNonSelf,"SpecificityDiffAge"=SpecificityArrayNonSelf,"AUCNull1"=AUCArrayNull1,"SensitivityNull1"=SensitivityArrayNull1,"SpecificityNull1"=SpecificityArrayNull1,"AUCNull2"=AUCArrayNull2,"SensitivityNull2"=SensitivityArrayNull2,"SpecificityNull2"=SpecificityArrayNull2,"featureProfile"=featureProfile);
	return(returnList);
		
}

iterative_rf_multiple_valid_with_null_distribution = function(data,window1,window2,window3,disease,control,training_iter,testing_iter,trainsize,testsize)
{
	set.seed(150);
	featureProfile <- as.data.frame(matrix(NA,training_iter,ncol(data)));
	AUCArraySelf <- matrix(NA,training_iter,testing_iter)
	SensitivityArraySelf <- matrix(NA,training_iter,testing_iter)
	SpecificityArraySelf <- matrix(NA,training_iter,testing_iter)
	AUCArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNonSelf <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SensitivityNull1 <- matrix(NA,training_iter,testing_iter)
	SpecificityNull1 <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNull1 <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNull1 <- matrix(NA,training_iter,testing_iter)
	AUCArrayNull2 <- matrix(NA,training_iter,testing_iter)
	SensitivityArrayNull2 <- matrix(NA,training_iter,testing_iter)
	SpecificityArrayNull2 <- matrix(NA,training_iter,testing_iter)
	#threshold <- ifelse(length(intersect(window,disease)) <= 20,10,20);
	for(i in 1:training_iter)
	{
		if(length(intersect(window1,disease)) > trainsize)
		{
			trainDisease <- sample(intersect(window1,disease),trainsize,replace=FALSE);
		}
		else
		{
			trainDisease <- sample(intersect(window1,disease),trainsize,replace=TRUE);
		}

		if(length(intersect(window1,control)) > trainsize)
		{
			trainControl <- sample(intersect(window1,control),trainsize,replace=FALSE);
		}
		else
		{
			trainControl <- sample(intersect(window1,control),trainsize,replace=TRUE);
		}
		tempTrain <- rbind(data[trainDisease,],data[trainControl,]);
		rownames(tempTrain) <- make.names(rownames(tempTrain),unique=TRUE);
		TrainDiseaseTags <- NULL;
		TrainDiseaseTags[1:length(trainDisease)] <- "Diseased";
		TrainControlTags <- NULL;
		TrainControlTags[1:length(trainControl)] <- "Control";
		TrainTags <- c(TrainDiseaseTags,TrainControlTags);
		rfTempComp <- randomForest(as.factor(TrainTags)~.,tempTrain);
		#print("model created");
		featureProfile[i,] <- sapply(colnames(tempTrain),function(x)(ifelse(x %in% rownames(rfTempComp$importance),rfTempComp$importance[x,],0)));
		for(k in 1:testing_iter)
		{
			if(length(intersect(window2,disease)) > testsize)
			{
				testDiseaseSelf <- sample(intersect(window2,disease),testsize,replace=FALSE);
			}
			else
			{
				testDiseaseSelf <- sample(intersect(window2,disease),testsize,replace=TRUE);
			}
		
			if(length(intersect(window2,control)) > testsize)
			{
				testControlSelf <- sample(intersect(window2,control),testsize,replace=FALSE);
			}
			else
			{
				testControlSelf <- sample(intersect(window2,control),testsize,replace=TRUE);
			}

			if(length(intersect(window3,disease)) > testsize)
			{
				testDiseaseNonSelf <- sample(intersect(window3,disease),testsize,replace=FALSE);
			}
			else
			{
				testDiseaseNonSelf <- sample(intersect(window3,disease),testsize,replace=TRUE);
			}
		
			if(length(intersect(window3,control)) > testsize)
			{
				testControlNonSelf <- sample(intersect(window3,control),testsize,replace=FALSE);
			}
			else
			{
				testControlNonSelf <- sample(intersect(window3,control),testsize,replace=TRUE);
			}
			tempTestSelf <- rbind(data[testDiseaseSelf,],data[testControlSelf,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseSelf)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlSelf)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestSelf) <- make.names(rownames(tempTestSelf),unique=TRUE);
			rfTempPredictSelf <- predict(rfTempComp,tempTestSelf,type="vote",norm.votes=TRUE);
			print(i)
			print(k)
			AUCArraySelf[i,k] <- auc(TestTags,rfTempPredictSelf[,2])[1];
			SensitivityArraySelf[i,k] <- length(which(predict(rfTempComp,tempTestSelf[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestSelf[1:testsize,]));
			SpecificityArraySelf[i,k] <- length(which(predict(rfTempComp,tempTestSelf[(testsize+1):nrow(tempTestSelf),])=="Control"))/length(predict(rfTempComp,tempTestSelf[(testsize+1):nrow(tempTestSelf),]));
		
			tempTestNonSelf <- rbind(data[testDiseaseNonSelf,],data[testControlNonSelf,]);
			#print(testDiseaseSelf)
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNonSelf)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNonSelf)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNonSelf) <- make.names(rownames(tempTestNonSelf),unique=TRUE);
			rfTempPredictNonSelf <- predict(rfTempComp,tempTestNonSelf,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNonSelf[i,k] <- auc(TestTags,rfTempPredictNonSelf[,2])[1];
			SensitivityArrayNonSelf[i,k] <- length(which(predict(rfTempComp,tempTestNonSelf[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNonSelf[1:testsize,]));
			SpecificityArrayNonSelf[i,k] <- length(which(predict(rfTempComp,tempTestNonSelf[(testsize+1):nrow(tempTestNonSelf),])=="Control"))/length(predict(rfTempComp,tempTestNonSelf[(testsize+1):nrow(tempTestNonSelf),]));

			testDiseaseNull1 <- sample(sample(c(testDiseaseSelf,testDiseaseNonSelf)),testsize,replace=FALSE)
			testControlNull1 <- sample(sample(c(testControlSelf,testControlNonSelf)),testsize,replace=FALSE)
		
			tempTestNull1 <- rbind(data[testDiseaseNull1,],data[testControlNull1,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNull1)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNull1)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNull1) <- make.names(rownames(tempTestNull1),unique=TRUE);
			rfTempPredictNull1 <- predict(rfTempComp,tempTestNull1,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNull1[i,k] <- auc(TestTags,rfTempPredictNull1[,2])[1];
			SensitivityArrayNull1[i,k] <- length(which(predict(rfTempComp,tempTestNull1[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNull1[1:testsize,]));
			SpecificityArrayNull1[i,k] <- length(which(predict(rfTempComp,tempTestNull1[(testsize+1):nrow(tempTestNull1),])=="Control"))/length(predict(rfTempComp,tempTestNull1[(testsize+1):nrow(tempTestNull1),]));

			testDiseaseNull2 <- sample(sample(c(testDiseaseSelf,testDiseaseNonSelf)),testsize,replace=FALSE)	
			testControlNull2 <- sample(sample(c(testControlSelf,testControlNonSelf)),testsize,replace=FALSE)
		
			tempTestNull2 <- rbind(data[testDiseaseNull2,],data[testControlNull2,]);
			TestDiseaseTags <- NULL;
			TestDiseaseTags[1:length(testDiseaseNull2)] <- "Diseased";
			TestControlTags <- NULL;
			TestControlTags[1:length(testControlNull2)] <- "Control";
			TestTags <- c(TestDiseaseTags,TestControlTags);
			rownames(tempTestNull2) <- make.names(rownames(tempTestNull2),unique=TRUE);
			rfTempPredictNull2 <- predict(rfTempComp,tempTestNull2,type="vote",norm.votes=TRUE);
			#print(k);
			AUCArrayNull2[i,k] <- auc(TestTags,rfTempPredictNull2[,2])[1];
			SensitivityArrayNull2[i,k] <- length(which(predict(rfTempComp,tempTestNull2[1:testsize,])=="Diseased"))/length(predict(rfTempComp,tempTestNull2[1:testsize,]));
			SpecificityArrayNull2[i,k] <- length(which(predict(rfTempComp,tempTestNull2[(testsize+1):nrow(tempTestNull2),])=="Control"))/length(predict(rfTempComp,tempTestNull2[(testsize+1):nrow(tempTestNull2),]));	

			k <- k + 1;
		}	
		
		i <- i + 1;
	}
	colnames(featureProfile) <- colnames(tempTrain);
	#returnList <- list("featureProfile"=featureProfile,"AUC"=AUCArray,"Accuracy"=AccuracyArray);
	returnList <- list("AUCSameAge"=AUCArraySelf,"SensitivitySameAge"=SensitivityArraySelf,"SpecificitySameAge"=SpecificityArraySelf,"AUCDiffAge"=AUCArrayNonSelf,"SensitivityDiffAge"=SensitivityArrayNonSelf,"SpecificityDiffAge"=SpecificityArrayNonSelf,"AUCNull1"=AUCArrayNull1,"SensitivityNull1"=SensitivityArrayNull1,"SpecificityNull1"=SpecificityArrayNull1,"AUCNull2"=AUCArrayNull2,"SensitivityNull2"=SensitivityArrayNull2,"SpecificityNull2"=SpecificityArrayNull2,"featureProfile"=featureProfile);
	return(returnList);
		
}



CRCCountryCohort <- rownames(combined_short_metadata_2[combined_short_metadata_2$country %in% droplevels(combined_short_metadata_2[CRCIndividuals,"country"]),])
AdenomaCountryCohort <- rownames(combined_short_metadata_2[combined_short_metadata_2$country %in% droplevels(combined_short_metadata_2[AdenomaIndividuals,"country"]),])
#T2DCountryCohort <- rownames(combined_short_metadata_2[combined_short_metadata_2$country %in% droplevels(combined_short_metadata_2[T2DIndividuals,"country"]),])
CirrhosisCountryCohort <- rownames(combined_short_metadata_2[combined_short_metadata_2$country %in% droplevels(combined_short_metadata_2[CirrhosisIndividuals,"country"]),])
IBDCountryCohort <- rownames(combined_short_metadata_2[combined_short_metadata_2$country %in% droplevels(combined_short_metadata_2[IBDIndividuals,"country"]),])
T2DCountryCohort <- c(intersect(c(Young,Middle),AsiaIndividuals),intersect(Elderly,c(AllEUIndividuals,AsiaIndividuals)))

rfIBDPermutedMultipleNullElderly <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Elderly,IBDCountryCohort),intersect(c(Young,Middle),IBDCountryCohort),IBDIndividuals,SelectControls,100,20,27,26)
rfIBDPermutedMultipleNullMiddle <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Middle,IBDCountryCohort),intersect(c(Elderly,Young),IBDCountryCohort),IBDIndividuals,SelectControls,100,20,27,26)
rfIBDPermutedMultipleNullYoung <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Young,IBDCountryCohort),intersect(c(Elderly,Middle),IBDCountryCohort),IBDIndividuals,SelectControls,100,20,27,26)

rfCirrhosisPermutedMultipleNullElderly <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Elderly,CirrhosisCountryCohort),intersect(c(Young,Middle),CirrhosisCountryCohort),CirrhosisIndividuals,SelectControls,100,20,10,9)
rfCirrhosisPermutedMultipleNullMiddle <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Middle,CirrhosisCountryCohort),intersect(c(Elderly,Young),CirrhosisCountryCohort),CirrhosisIndividuals,SelectControls,100,20,10,9)
rfCirrhosisPermutedMultipleNullYoung <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Young,CirrhosisCountryCohort),intersect(c(Elderly,Middle),CirrhosisCountryCohort),CirrhosisIndividuals,SelectControls,100,20,10,9)

rfT2DPermutedMultipleNullElderly <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Elderly,T2DCountryCohort),intersect(c(Young,Middle),T2DCountryCohort),T2DIndividuals,SelectControls,100,20,14,13)
rfT2DPermutedMultipleNullMiddle <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Middle,T2DCountryCohort),intersect(c(Elderly,Young),T2DCountryCohort),T2DIndividuals,SelectControls,100,20,14,13)
rfT2DPermutedMultipleNullYoung <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Young,T2DCountryCohort),intersect(c(Elderly,Middle),T2DCountryCohort),T2DIndividuals,SelectControls,100,20,14,13)

rfCRCPermutedMultipleNullElderly <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Elderly,CRCCountryCohort),intersect(c(Young,Middle),CRCCountryCohort),CRCIndividuals,SelectControls,100,20,23,22)
rfCRCPermutedMultipleNullYoungMiddle <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(c(Young,Middle),CRCCountryCohort),intersect(Elderly,CRCCountryCohort),CRCIndividuals,SelectControls,100,20,23,22)

rfAdenomaPermutedMultipleNullElderly <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(Elderly,AdenomaCountryCohort),intersect(c(Young,Middle),AdenomaCountryCohort),AdenomaIndividuals,SelectControls,100,20,11,10)
rfAdenomaPermutedMultipleNullYoungMiddle <- iterative_rf_multiple_with_null_distribution(combined_species_profile_with_age_country_final[,core_species],intersect(c(Young,Middle),AdenomaCountryCohort),intersect(Elderly,AdenomaCountryCohort),AdenomaIndividuals,SelectControls,100,20,11,10)


#Create Dataframes for plots
#IBD
rfIBDAgeTestAUCs <- as.data.frame(matrix(NA,600,4))
colnames(rfIBDAgeTestAUCs) <- c("AUC","ClassificationGroup","AgeGroup","Iter")
rfIBDAgeTestAUCs$AUC <- c(apply(rfIBDPermutedMultipleNullYoung$AUCSameAge,1,median),apply(rfIBDPermutedMultipleNullYoung$AUCDiffAge,1,median),apply(rfIBDPermutedMultipleNullMiddle$AUCSameAge,1,median),apply(rfIBDPermutedMultipleNullMiddle$AUCDiffAge,1,median),apply(rfIBDPermutedMultipleNullElderly$AUCSameAge,1,median),apply(rfIBDPermutedMultipleNullElderly$AUCDiffAge,1,median))
rfIBDAgeTestAUCs$ClassificationGroup <- factor(c(rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100)),levels=c("SameAge","DiffAge"))
rfIBDAgeTestAUCs$AgeGroup <- c(rep("Young",200),rep("Middle",200),rep("Elderly",200))
rfIBDAgeTestAUCs$Iter <- c(paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)))
#T2D
rfT2DAgeTestAUCs <- as.data.frame(matrix(NA,600,4))
colnames(rfT2DAgeTestAUCs) <- c("AUC","ClassificationGroup","AgeGroup","Iter")
rfT2DAgeTestAUCs$AUC <- c(apply(rfT2DPermutedMultipleNullYoung$AUCSameAge,1,median),apply(rfT2DPermutedMultipleNullYoung$AUCDiffAge,1,median),apply(rfT2DPermutedMultipleNullMiddle$AUCSameAge,1,median),apply(rfT2DPermutedMultipleNullMiddle$AUCDiffAge,1,median),apply(rfT2DPermutedMultipleNullElderly$AUCSameAge,1,median),apply(rfT2DPermutedMultipleNullElderly$AUCDiffAge,1,median))
rfT2DAgeTestAUCs$ClassificationGroup <- factor(c(rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100)),levels=c("SameAge","DiffAge"))
rfT2DAgeTestAUCs$AgeGroup <- c(rep("Young",200),rep("Middle",200),rep("Elderly",200))
rfT2DAgeTestAUCs$Iter <- c(paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)))
#Cirrhosis
rfCirrhosisAgeTestAUCs <- as.data.frame(matrix(NA,600,4))
colnames(rfCirrhosisAgeTestAUCs) <- c("AUC","ClassificationGroup","AgeGroup","Iter")
rfCirrhosisAgeTestAUCs$AUC <- c(apply(rfCirrhosisPermutedMultipleNullYoung$AUCSameAge,1,median),apply(rfCirrhosisPermutedMultipleNullYoung$AUCDiffAge,1,median),apply(rfCirrhosisPermutedMultipleNullMiddle$AUCSameAge,1,median),apply(rfCirrhosisPermutedMultipleNullMiddle$AUCDiffAge,1,median),apply(rfCirrhosisPermutedMultipleNullElderly$AUCSameAge,1,median),apply(rfCirrhosisPermutedMultipleNullElderly$AUCDiffAge,1,median))
rfCirrhosisAgeTestAUCs$ClassificationGroup <- factor(c(rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100)),levels=c("SameAge","DiffAge"))
rfCirrhosisAgeTestAUCs$AgeGroup <- c(rep("Young",200),rep("Middle",200),rep("Elderly",200))
rfCirrhosisAgeTestAUCs$Iter <- c(paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)))
#CRC
rfCRCAgeTestAUCs <- as.data.frame(matrix(NA,400,4))
colnames(rfCRCAgeTestAUCs) <- c("AUC","ClassificationGroup","AgeGroup","Iter")
rfCRCAgeTestAUCs$AUC <- c(apply(rfCRCPermutedMultipleNullYoungMiddle$AUCSameAge,1,median),apply(rfCRCPermutedMultipleNullYoungMiddle$AUCDiffAge,1,median),apply(rfCRCPermutedMultipleNullElderly$AUCSameAge,1,median),apply(rfCRCPermutedMultipleNullElderly$AUCDiffAge,1,median))
rfCRCAgeTestAUCs$ClassificationGroup <- factor(c(rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100)),levels=c("SameAge","DiffAge"))
rfCRCAgeTestAUCs$AgeGroup <- c(rep("YoungMiddle",200),rep("Elderly",200))
rfCRCAgeTestAUCs$Iter <- c(paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)))
#Adenoma
rfAdenomaAgeTestAUCs <- as.data.frame(matrix(NA,400,4))
colnames(rfAdenomaAgeTestAUCs) <- c("AUC","ClassificationGroup","AgeGroup","Iter")
rfAdenomaAgeTestAUCs$AUC <- c(apply(rfAdenomaPermutedMultipleNullYoungMiddle$AUCSameAge,1,median),apply(rfAdenomaPermutedMultipleNullYoungMiddle$AUCDiffAge,1,median),apply(rfAdenomaPermutedMultipleNullElderly$AUCSameAge,1,median),apply(rfAdenomaPermutedMultipleNullElderly$AUCDiffAge,1,median))
rfAdenomaAgeTestAUCs$ClassificationGroup <- factor(c(rep("SameAge",100),rep("DiffAge",100),rep("SameAge",100),rep("DiffAge",100)),levels=c("SameAge","DiffAge"))
rfAdenomaAgeTestAUCs$AgeGroup <- c(rep("YoungMiddle",200),rep("Elderly",200))
rfAdenomaAgeTestAUCs$Iter <- c(paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)),paste0("Iter",c(1:100)))


