# -*- coding: utf-8 -*-

'''
Function: The main function is to calculate the scoring function, output the scoring results of 1000 groups of random drug combinations, and add a time stamp to the output file name to ensure the uniqueness of the output. In addition, the formula data exceeding the first group of scoring data are summarized into a file, which is convenient for counting the related formula contents.

！！！
Pay attention to changing the file names of the read files and generated results before running, so as to avoid overwriting the previous results.
！！！

————By Qichao.Yang
'''

import csv
import os
import pandas as pd
import re
import random
from typing import Counter
import itertools
import datetime

# Change the current path to the specified working directory
os.chdir("C:/Users/Desktop/Combination")

# Get system timestamp
nowTime = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
print(str(nowTime))

# Global benchmark score
scoreStandard = 0

# Extract data from excel table and convert it into dateframe type.
# The table contains the substance name, target information, indications, etc. required for scoring.
invoice_data_1 = pd.read_excel('./DrugList.xlsx', header=0)

# Take out the drugs of the participating prescription to the list.
node_list_key_Ysty_Name = []
for i in range(0, len(invoice_data_1)):
    node_list_key_Ysty_Name.append(invoice_data_1['Classical Prescriptions'][i])

# Remove the relevant target to the list.
i = 0
node_list_key_Ysty_Target = []
for i in range(0, len(invoice_data_1)):
    node_list_key_Ysty_Target.append(invoice_data_1['Checkpoint Targets'][i])

# Take out the score of homogenization heat to list.
i = 0
node_list_key_Ysty_Score = []
for i in range(0, len(invoice_data_1)):
    node_list_key_Ysty_Score.append(invoice_data_1['Homogenization Score'][i])

header = ['DrugName','Targets','HomogenizationScore']

# Compose the extracted substances and attributes into a new temporary file.
with open('./YstyDrugTemp.csv','w',newline='',encoding='utf-8') as f:
    writer  = csv.writer(f)
    writer.writerow(header)

    dataTemp = []
    for i in range(0,len(invoice_data_1)):
        if str(node_list_key_Ysty_Score[i]) != 'nan' and str(node_list_key_Ysty_Target[i]) != '_':
            dataTemp.append(node_list_key_Ysty_Name[i])
            dataTemp.append(node_list_key_Ysty_Target[i])
            dataTemp.append(node_list_key_Ysty_Score[i])

            writer.writerow(dataTemp)
            dataTemp = []

f.close()

invoice_data_2 = pd.read_csv('./YstyDrugTemp.csv', header=0,encoding='utf-8')

print('Please enter the number of drugs less than '+ str(len(invoice_data_2)) )

# Number of medicinal and edible homologues or classical famous prescription substances contained in the prescription
# x = 8
xSelect = input('Please enter the number of drugs to be included in the recipe：')
x = int(xSelect)

print('Indications: Tumor Immunity.')

# Give each substance involved in the calculation a unique number.
numYsty = []
for i in range(0,len(invoice_data_2)):
    numYsty.append(i)

header = ['Group Number']
for i in range(x):
    header.append('Component ' + str(i+1))
header.append('Group Score')

# export file name
# e.g:fileName = "./ResultList-1.csv"
fileName = "./ResultList-"+str(nowTime)+".csv"
fileName1 = "./betterResultList-"+str(nowTime)+".csv"

with open(fileName,'w',newline='') as f:
    writer  = csv.writer(f)
    writer.writerow(header)

    dataTemp = []
    listNum = 0

    with open(fileName1,'w',newline='') as f:
        writer1 = csv.writer(f)
        writer1.writerow(header)
        
        # X substances were selected according to the combination number from all the extracted drugs related to the indications to form a new formula
        for select in itertools.combinations(numYsty, x):
            # print("The selected drug combination sequence number is: " + str(select))
            random_num = select

            dataTemp.append(listNum + 1)
            listNum += 1

            if listNum > 1000:
                break

            # According to the principle of simple random sampling, x rows are randomly drawn out to form a new table, and then the subsequent calculation is completed on the new table
            # Different from the combined number calculation method in line 110. Note! Only one of these methods can be used per operation
            random_num = random.sample(range(0 , len(invoice_data_2)),x)

            # Output random combination sequence number
            print(random_num)

            # Remove drug name to list
            i = 0
            node_list_key_Ysty = []
            for i in random_num:
                node_list_key_Ysty.append(invoice_data_2['DrugName'][i])

            # Take out immune targets to list
            i = 0
            node_list_key_Ysty_Target = []
            for i in random_num:
                node_list_key_Ysty_Target.append(invoice_data_2['Targets'][i])

            # Take out the homogenization heat score to list
            i = 0
            node_list_key_Ysty_Score = []
            for i in random_num:
                node_list_key_Ysty_Score.append(invoice_data_2['HomogenizationScore'][i])

            # Segmenting and extracting a target corresponding to each medicine-food homologous or classical famous square substance
            my_data = [] # List of targets of action
            num = 0
            numTarget = []  # Count of action targets of each medicine-food homologous or classical famous prescription substance
            targetList = [] # Summarize all targets
            for i in range(0, len(node_list_key_Ysty)):
                targetTemp = re.split(r'[\;]+',node_list_key_Ysty_Target[i])
                for targets in targetTemp:
                    targetList.append(targets)
                my_data.append(targetTemp)
                num = len(targetTemp)   # Number of action targets of each medicine-food homologous or classical famous prescription substance
                numTarget.append(num)

            # Finding targets with more than one acting drug
            numCounter = Counter(targetList)
            a = sorted(numCounter.items(), key=lambda item: item[1], reverse=True)
            counterTemp = 0
            for i in range(0,len(a)):
                if a[i][1] >= 2:
                    counterTemp = counterTemp + 1

            # Total list deduplication targets
            targetList1 = set(targetList)
            # print(targetList)
            # Counting the total number of action targets
            numTargetList = len(targetList1)
            # print(numTargetList)


            Score = 0   # Total score
            t = 0   # Number of all known targets
            # x = 0   # Total number of drugs in the prescription
            g = 0   # Number of targets with the action of two or more drugs
            rx = 0  # The current value is 1.
            dx = 0  
            z1 = 0
            z2 = 0
            z3 = 0

            t = numTargetList
            g = counterTemp
            # x = 8

            rx = 1
            dx = 1 - g/t

            i = 0
            zTemp = 0
            num = 0

            while i < x:
                z1 = 10 * numTarget[i] / t
                z2 = 1/x
                z3 = int(node_list_key_Ysty_Score[i])
                # zTemp = zTemp + cmath.sqrt(z1**2+z2**2+z3**2)  # This method can calculate complex numbers and negative numbers.
                zTemp = zTemp + (z1**2+z2**2+z3**2)**0.5
                # print(z1,z2,z3,zTemp)
                num = i + 1
                # print(num,node_list_key_Ysty[i])
                dataTemp.append(node_list_key_Ysty[i])
                i = i + 1

            Score = zTemp * dx * rx
            # print('The final score of the group is: %0.3f'%(Score))
            dataTemp.append(Score)

            if(listNum == 1 ):
                scoreStandard = Score
                print('The benchmark score is ' + str(scoreStandard))
                writer1.writerow(dataTemp)

            if(Score > scoreStandard):
                # print(num,node_list_key_Ysty[i])
                # print('The score of the formula greater than the benchmark is: %0.3f'%(Score))
                writer1.writerow(dataTemp)
            
            writer.writerow(dataTemp)
            dataTemp = []
