Initial commit

975d020a · wangys_biolab · 975d020a · 975d020a · 975d020a · 975d020a
Commit 975d020a authored Feb 14, 2022 by wangys_biolab
91 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/codes.iml
+++ b/.idea/codes.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/csv-plugin.xml
+++ b/.idea/csv-plugin.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CsvFileAttributes">
+    <option name="attributeMap">
+      <map>
+        <entry key="D:\Wangys_data_postdoc\Effector_predictions\GAN_model\6.feature_selection\feature_selection_model.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="D:\Wangys_data_postdoc\Effector_predictions\fungi_effector_DRLF\eFeature\src\SSA_embedding.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\Effector_GAN.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\embedding_model\get_feature.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\embedding_model\ifeature.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\embedding_model\src\SSA_embedding.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/codes.iml" filepath="$PROJECT_DIR$/.idea/codes.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/Effector_GAN.py
+++ b/Effector_GAN.py
+import joblib
+import pandas as pd
+from embedding_model.get_feature import fasta,GnerateFeatures
+import argparse
+import time
+
+def predict_model(inputfasta,outfile):
+    seq_dict,id_list,seq_list = fasta(inputfasta)
+    feature_sel= GnerateFeatures(id_list,seq_dict,inputfasta)
+    T0 = time.time()
+    print("Predicting...")
+    mlp_model_1752 = joblib.load(open('./mlp_model/mlp_gan_selection1752.pkl', 'rb'))
+    resultsTestingProb_gan1752 = mlp_model_1752.predict_proba(feature_sel)
+    resultsTesting_gan1752 = []
+    for indexResults in range(len(resultsTestingProb_gan1752)):
+        if float(resultsTestingProb_gan1752[indexResults][1]) > 0.5 or float(
+                resultsTestingProb_gan1752[indexResults][1]) == 0.5:
+            resultsTesting_gan1752.append("Fungal effector")
+        else:
+            resultsTesting_gan1752.append("non-effector")
+    Prob = []
+    for i in range(len(id_list)):
+        id2 = []
+        id2.append(str(id_list[i]))
+        id2.append(str(round(float(resultsTestingProb_gan1752[:,1][i]), 3)))
+        Prob.append(id2)
+    Prob_df =pd.DataFrame(Prob)
+    reultsTesting_gan1752_df =pd.DataFrame(resultsTesting_gan1752)
+    results = pd.concat([Prob_df,reultsTesting_gan1752_df], axis=1)
+    col = ["Sequence_ID", "Fungal effector protein possibility","Classification"]
+    result2 = pd.DataFrame(data=results)
+    result2.columns=col
+    result2.to_csv(outfile)
+    print(" ")
+    print("Effector-GAN completed !!!!!")
+    print("===============================")
+    print(" ")
+    print("it took %0.3f mins.\n" % ((time.time() - T0) / 60))
+
+if __name__=="__main__":
+    parser = argparse.ArgumentParser(
+        'Script for predicting fungal effector proteins using deep representation learning features '
+        'and generative adversarial network')
+
+    parser.add_argument('-i', type=str, help='input sequences in Fasta format')
+    parser.add_argument('-o', type=str, help='path to saved CSV file')
+
+    args = parser.parse_args()
+    inputfasta= args.i
+    outfile = args.o
+    predict_model(inputfasta,outfile)
+
--- a/TSNE/.idea/.gitignore
+++ b/TSNE/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/TSNE/.idea/TSNE.iml
+++ b/TSNE/.idea/TSNE.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/TSNE/.idea/csv-plugin.xml
+++ b/TSNE/.idea/csv-plugin.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CsvFileAttributes">
+    <option name="attributeMap">
+      <map>
+        <entry key="\figure_tsne.py">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/TSNE/.idea/inspectionProfiles/profiles_settings.xml
+++ b/TSNE/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/TSNE/.idea/misc.xml
+++ b/TSNE/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/TSNE/.idea/modules.xml
+++ b/TSNE/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/TSNE.iml" filepath="$PROJECT_DIR$/.idea/TSNE.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/TSNE/figure_tsne.py
+++ b/TSNE/figure_tsne.py
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import manifold
+
+fake_positive_data = "Iteration_0_Synthetic_Training_Positive.txt"
+real_positive_data = "train_positive_merged_feature.csv"
+
+path = "./real_fake_pos/"
+#读取realdata
+datasetOriginal_Positive1 = pd.read_csv(path+real_positive_data,index_col=0, header=0)
+datasetOriginal_Positive = np.array(datasetOriginal_Positive1, dtype='float')
+
+#读取fakedata
+fakedataset_positive0 = pd.read_csv(path+fake_positive_data, index_col=None,header=None,sep=",")
+fakedataset_positive1 = np.array(fakedataset_positive0, dtype='float')
+fakedataset_positive = np.delete(fakedataset_positive1,-1, axis = 1)
+
+#merge_real_fake_data
+realFakeFeatures = np.vstack((datasetOriginal_Positive, fakedataset_positive))
+
+#定义标签
+label = []
+for rowIndex in range(len(datasetOriginal_Positive)):
+    label.append(1)
+for rowIndex in range(len(fakedataset_positive)):
+    label.append(0)
+labelArray = np.asarray(label)
+
+# tsne
+
+tsne = manifold.TSNE(n_components=2, random_state=500)
+
+realFake_tsne=tsne.fit_transform(realFakeFeatures.data)
+
+plt.figure(figsize=(9, 6))
+for i in range(len(labelArray)):
+    if labelArray[i] == 0:
+        s1 = plt.scatter(realFake_tsne[i, 0], realFake_tsne[i, 1], s=50, lw=3, color='r')
+    elif labelArray[i] == 1:
+        s2 = plt.scatter(realFake_tsne[i, 0], realFake_tsne[i, 1], s=100, lw=3, color='g', marker='^')
+
+plt.rcParams['font.sans-serif']=['Times New Roman']
+
+plt.xticks(size=15,weight='bold')
+plt.yticks(size=15,weight='bold')
+plt.xlabel('x-tsne',fontdict={'family' : 'Times New Roman', 'size' : 20},weight='bold')
+
+plt.ylabel('y-tsne',fontdict={'family' : 'Times New Roman', 'size' : 20},weight='bold')
+
+plt.title("Syn.pos. +real pos.(Iteration = 0)",fontdict={'family' : 'Times New Roman', 'size' : 20},weight='bold')
+# real (green dots) and synthetic (red dots) protein feature
+plt.rcParams.update({'font.size': 15})
+plt.rcParams["font.weight"] = "bold"
+plt.legend((s1,s2),('Synthetic positive protein samples','Real positive protein samples') ,loc = 'best')
+bwith = 2 #边框宽度设置为2
+ax = plt.gca()#获取边框
+ax.spines['top'].set_linewidth(bwith)  # 设置上‘脊梁’为红色
+ax.spines['right'].set_linewidth(bwith)  # 设置上‘脊梁’为无色
+ax.spines['bottom'].set_linewidth(bwith)
+ax.spines['left'].set_linewidth(bwith)
+ax.spines['top'].set_linewidth(bwith)
+ax.spines['right'].set_linewidth(bwith)
+
+plt.savefig("pos_tsne_iter0.pdf", dpi=600,format="pdf")
+
+plt.show()
+
+
+
+
+
--- a/TSNE/pos_tsne_iter0.pdf
+++ b/TSNE/pos_tsne_iter0.pdf
--- a/TSNE/real_fake_pos/Iteration_0_Synthetic_Training_Positive.txt
+++ b/TSNE/real_fake_pos/Iteration_0_Synthetic_Training_Positive.txt
--- a/TSNE/real_fake_pos/train_positive_merged_feature.csv
+++ b/TSNE/real_fake_pos/train_positive_merged_feature.csv
--- a/WGAN/CTST2.py
+++ b/WGAN/CTST2.py
+
+#-----The Python implementation of a approach to conduct the CTST on synthetic and real testing protein feature samples.
+#-----Please download data via http://bioinfadmin.cs.ucl.ac.uk/downloads/FFPredGAN/TestingData/.
+
+import numpy as np
+import math
+import glob
+import random
+import torch
+import torch.autograd as autograd
+import torch.nn as nn
+from sklearn.model_selection import LeaveOneOut
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import accuracy_score,recall_score
+import os
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import accuracy_score, precision_score, recall_score,f1_score,\
+    matthews_corrcoef,confusion_matrix,classification_report,roc_curve,auc
+
+
+class Generator(nn.Module):
+
+    def __init__(self):
+        super(Generator, self).__init__()
+
+        main = nn.Sequential(
+            nn.Linear(258, 512),
+            nn.ReLU(True),
+            nn.Linear(512, 512),
+            nn.ReLU(True),
+            nn.Linear(512, 512),
+            nn.Tanh(),
+            nn.Linear(512, 258),
+        )
+        self.main = main
+
+    def forward(self, noise):
+            output = self.main(noise)
+            return output
+
+ITERS = 100000
+real_positive_data = "train_positive_merged_feature.csv"
+
+accuracy_file = "CTST_synthetic_accuracy.csv"
+outfile = open(accuracy_file, "w")
+outfile.write('%s%s%s%s%s%s%s%s%s\n' % ("Iteration",",","accuracy",",","recall",",","precision",",","f1"))
+
+##读取synthetic feature samples
+
+path = "./synthetic/" #文件夹目录
+files= os.listdir(path) #得到文件夹下的所有文件名称
+file_syn=[]
+for i in range(len(files)):
+    if files[i].endswith("Synthetic_Training_Positive.txt"):
+        file_syn.append(files[i])
+#读取real feature samples
+datasetOriginal_Positive = pd.read_csv(real_positive_data,index_col=0, header=0)
+# datasetOriginal_Positive1 = datasetOriginal_Positive.values / 100.
+# datasetOriginal_Positive_scaler = StandardScaler().fit_transform(datasetOriginal_Positive)
+# datasetOriginal_Positive_scaler = StandardScaler().fit_transform(datasetOriginal_Positive)
+datasetOriginal_positive2 = np.array(datasetOriginal_Positive, dtype='float')
+datasetOriginal_Positive3= [line[:] for line in datasetOriginal_positive2[:]]
+datasetOriginal_Positive = np.array(datasetOriginal_Positive3, dtype='float')
+
+opt_diff_accuracy_05=0.5
+opt_Epoch=0
+opt_accuracy=0
+
+for i in range(len(file_syn)):
+    #读取synthetic feature samples
+    fake_samples_positive0 = pd.read_csv(path+file_syn[i],sep=",",index_col=None, header=None)
+    fakedataset_positive1 = np.array(fake_samples_positive0, dtype='float')
+    fakedataset_positive2 = np.delete(fakedataset_positive1, -1, axis = 1)
+    fakedataset_positive3 = [line[:] for line in fakedataset_positive2[:]]
+    fakedataset_positive = np.array(fakedataset_positive3, dtype='float')
+    Iteration = file_syn[i].split("_")[1]
+    # print(i,Iteration)
+    #设置标签,real_feature_data设置为1,synthetic feature data设置为0
+    label = []
+    for rowIndex in range(len(datasetOriginal_Positive)):
+        label.append(1)
+    for rowIndex in range(len(fakedataset_positive)):
+        label.append(0)
+    labelArray = np.asarray(label)
+    #合并real和synthetic样本
+    realFakeFeatures = np.vstack((datasetOriginal_Positive, fakedataset_positive))
+
+
+    prediction_list = []
+    real_list = []
+    loo = LeaveOneOut()
+    loo.get_n_splits(realFakeFeatures)
+    for train_index, test_index in loo.split(realFakeFeatures):
+        X_train, X_test = realFakeFeatures[train_index], realFakeFeatures[test_index]
+        y_train, y_test = labelArray[train_index], labelArray[test_index]
+        knn = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)
+        predicted_y = knn.predict(X_test)
+        prediction_list.append(predicted_y)
+        real_list.append(y_test)
+    accuracy = accuracy_score(real_list, prediction_list)
+    print(Iteration,accuracy)
+    recall = recall_score(real_list, prediction_list, average='weighted')
+    precision = precision_score(real_list, prediction_list)
+    f1 = f1_score(real_list, prediction_list, average='weighted')
+
+    outfile.write('%s%s%s%s%s%s%s%s%s\n' % (Iteration, ",", accuracy, ",", recall, ",", precision, ",", f1))
+
+
+
+
+
--- a/WGAN/WGAN.py
+++ b/WGAN/WGAN.py
+# coding=utf-8
+import os, sys
+sys.path.append(os.getcwd())
+import numpy as np
+import torch
+import torch.autograd as autograd
+import torch.nn as nn
+import torch.optim as optim
+torch.manual_seed(1)
+import pandas as pd
+
+
+
+
+input_file = "train_positive_merged_feature.csv"
+positive_real_feature = pd.read_csv(input_file,index_col=0, header=0)
+
+#设置参数
+feature_len = 5797
+GDIM = 512
+DDIM = 86
+FIXED_GENERATOR = True
+LAMBDA = .1
+CRITIC_ITERS = 5
+BATCH_SIZE = len(positive_real_feature)
+ITERS = 100000
+use_cuda = False
+
+# ###### 定义生成器 Generator #####
+class Generator(nn.Module):
+
+    def __init__(self):
+        super(Generator, self).__init__()
+        main = nn.Sequential(
+            nn.Linear(feature_len, GDIM), # 输入特征数为2192，输出为512
+            nn.ReLU(True), # relu激活
+            nn.Linear(GDIM, GDIM), # 线性变换
+            nn.ReLU(True),# relu激活
+            nn.Linear(GDIM, GDIM), # 线性变换
+            nn.Tanh(), # Tanh激活使得生成数据分布在【-1,1】之间，因为输入的真实数据的经过transforms之后也是这个分布
+            nn.Linear(GDIM, feature_len)
+        )
+        self.main = main
+
+    def forward(self, noise, real_data):
+        if FIXED_GENERATOR:
+            return noise + real_data
+        else:
+            output = self.main(noise)
+            return output
+
+# 定义判别器  #####Discriminator######使用多层网络来作为判别器
+class Discriminator(nn.Module):
+
+    def __init__(self):
+        super(Discriminator, self).__init__()
+
+        self.fc1=nn.Linear(feature_len, DDIM)
+        self.relu=nn.LeakyReLU()
+        self.fc2=nn.Linear(DDIM, DDIM)
+        self.relu=nn.LeakyReLU()
+        self.fc3 = nn.Linear(DDIM, DDIM)
+        self.relu = nn.LeakyReLU()
+        self.fc4 = nn.Linear(DDIM, 1)
+
+    def forward(self, inputs):
+
+        out=self.fc1(inputs)
+        out=self.relu(out)
+        out=self.fc2(out)
+        out=self.relu(out)
+        out=self.fc3(out)
+        out=self.relu(out)
+        out=self.fc4(out)
+
+        hidden1 = self.relu(self.fc1(inputs))
+        hidden2 = self.relu(self.fc2(self.relu(self.fc1(inputs))))
+        hidden3 = self.relu(self.fc3(self.relu(self.fc2(self.relu(self.fc1(inputs))))))
+
+        return out.view(-1), hidden1, hidden2, hidden3
+
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Linear') != -1:
+        m.weight.data.normal_(0.0, 0.02)
+        m.bias.data.fill_(0)
+    elif classname.find('BatchNorm') != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+
+
+def inf_train_gen():
+    positive_real_feature = pd.read_csv(input_file, index_col=0, header=0)
+    dataset2 = np.array(positive_real_feature, dtype='float32')
+    return dataset2
+
+
+def calc_gradient_penalty(netD, real_data, fake_data):
+    alpha = torch.rand(BATCH_SIZE, 1)
+    alpha = alpha.expand(real_data.size())
+    alpha = alpha.cuda() if use_cuda else alpha
+
+    interpolates = alpha * real_data + ((1 - alpha) * fake_data)
+
+    if use_cuda:
+        interpolates = interpolates.cuda()
+    interpolates = autograd.Variable(interpolates, requires_grad=True)
+
+    disc_interpolates, hidden_output_1, hidden_output_2, hidden_output_3 = netD(interpolates)
+
+    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
+                              grad_outputs=torch.ones(disc_interpolates.size()).cuda() if use_cuda else torch.ones(
+                                  disc_interpolates.size()),
+                              create_graph=True, retain_graph=True, only_inputs=True)[0]
+
+    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
+    return gradient_penalty
+
+netG = Generator()
+netD = Discriminator()
+netD.apply(weights_init)
+netG.apply(weights_init)
+
+
+if use_cuda:
+    netD = netD.cuda()
+    netG = netG.cuda()
+
+optimizerD = optim.Adam(netD.parameters(), lr=1e-8, betas=(0.5, 0.9))
+optimizerG = optim.Adam(netG.parameters(), lr=1e-8, betas=(0.5, 0.9))
+
+one = torch.FloatTensor([1])
+mone = one * -1
+if use_cuda:
+    one = one.cuda()
+    mone = mone.cuda()
+
+data = inf_train_gen()
+
+# ##########################进入训练##判别器的判断过程#####################
+
+for iteration in range(ITERS):
+    print(iteration)
+    for p in netD.parameters():
+        p.requires_grad = True
+    data = inf_train_gen()
+    real_data = torch.FloatTensor(data)
+    if use_cuda:
+        real_data = real_data.cuda()
+    real_data_v = autograd.Variable(real_data)
+
+    noise = torch.randn(BATCH_SIZE, feature_len)
+    if use_cuda:
+        noise = noise.cuda()
+    with torch.no_grad():
+        noisev = autograd.Variable(noise)
+    fake = autograd.Variable(netG(noisev, real_data_v).data)
+
+    fake_output = fake.data.cpu().numpy()
+
+    for iter_d in range(CRITIC_ITERS):
+
+        netD.zero_grad()
+
+        D_real, hidden_output_real_1, hidden_output_real_2, hidden_output_real_3 = netD(real_data_v)
+        D_real = D_real.mean()
+        # D_real.backward(mone)
+        D_real.backward()
+        noise = torch.randn(BATCH_SIZE, feature_len)
+        if use_cuda:
+            noise = noise.cuda()
+        with torch.no_grad():
+            noisev = autograd.Variable(noise)
+        fake = autograd.Variable(netG(noisev, real_data_v).data)
+
+        inputv = fake
+        D_fake, hidden_output_fake_1, hidden_output_fake_2, hidden_output_fake_3 = netD(inputv)
+        D_fake = D_fake.mean()
+        # D_fake.backward(one)
+        D_fake.backward()
+
+        gradient_penalty = calc_gradient_penalty(netD, real_data_v.data, fake.data)
+        gradient_penalty.backward()
+
+        D_cost = D_fake - D_real + gradient_penalty
+        Wasserstein_D = D_real - D_fake
+        optimizerD.step()
+
+    if iteration % 200 == 0:
+        fake_writer = open("./Iteration_" + str(iteration) + "_Synthetic_Training_Positive.txt", "w")
+
+        for rowIndex in range(len(fake_output)):
+            for columnIndex in range(len(fake_output[0])):
+                fake_writer.write(str(fake_output[rowIndex][columnIndex]) + ",")
+            fake_writer.write("\n")
+        fake_writer.flush()
+        fake_writer.close()
+
+    if not FIXED_GENERATOR:
+
+        for p in netD.parameters():
+            p.requires_grad = False
+        netG.zero_grad()
+
+        real_data = torch.Tensor(data)
+        if use_cuda:
+            real_data = real_data.cuda()
+        real_data_v = autograd.Variable(real_data)
+
+        noise = torch.randn(BATCH_SIZE, feature_len)
+        if use_cuda:
+            noise = noise.cuda()
+        noisev = autograd.Variable(noise)
+        fake = netG(noisev, real_data_v)
+        G, hidden_output_ignore_1, hidden_output_ignore_2, hidden_output_ignore_3 = netD(fake)
+        G = G.mean()
+        G.backward()
+        G_cost = -G
+        optimizerG.step()
+
+# 保存模型
+torch.save(netG.state_dict(), './generator.pth')
+torch.save(netD.state_dict(), './discriminator.pth')
+
--- a/WGAN/train_positive_merged_feature.csv
+++ b/WGAN/train_positive_merged_feature.csv
--- a/embedding_model/__pycache__/get_feature.cpython-37.pyc
+++ b/embedding_model/__pycache__/get_feature.cpython-37.pyc
--- a/embedding_model/__pycache__/get_feature.cpython-38.pyc
+++ b/embedding_model/__pycache__/get_feature.cpython-38.pyc
--- a/embedding_model/__pycache__/ifeature.cpython-37.pyc
+++ b/embedding_model/__pycache__/ifeature.cpython-37.pyc
--- a/embedding_model/effector_test_positive_allfeature4.csv
+++ b/embedding_model/effector_test_positive_allfeature4.csv
--- a/embedding_model/embbed_models/SSA_embed.model
+++ b/embedding_model/embbed_models/SSA_embed.model
--- a/embedding_model/feature_id.csv
+++ b/embedding_model/feature_id.csv
--- a/embedding_model/get_feature.py
+++ b/embedding_model/get_feature.py
+from __future__ import print_function,division
+import sys
+sys.path.append('./embedding_model/')
+from src.SSA_embedding import SSA_Embed,BiLSTM_Embed
+from src.UniRep_emb import UniRep_Embed
+from ifeature import extract_feature,bind_feature
+import pandas as pd
+import time
+from Bio import SeqIO
+
+def fasta(data):
+    seq_dict = {}
+    id_list = []
+    seq_list = []
+    for seq_record in SeqIO.parse(data, "fasta"):
+        id = seq_record.id
+        seq = seq_record.seq
+        seq_dict[id] = seq
+        id_list.append(id)
+        seq_list.append(seq)
+    return seq_dict, id_list, seq_list
+
+def Gnerate_iFeatures(id_list,data):
+    inputfile= data
+    outfile = "embedding_model/ifeature_vectors/"
+    extract_feature(inputfile, outfile)
+    ifeature = bind_feature(id_list,outfile)
+    return ifeature
+
+def GnerateFeatures(id_list,seq_dict,data):
+    ifeature = Gnerate_iFeatures(id_list,data)
+    feature_BiLSTM = BiLSTM_Embed(seq_dict)
+    feature_SSA = SSA_Embed(seq_dict)
+    feature_Unirep = UniRep_Embed(seq_dict)
+    # feature_Unirep = pd.read_csv("unirep_feature.csv",index_col=0,header=0)
+    fusedFeature = pd.concat([feature_BiLSTM,feature_Unirep,ifeature,feature_SSA], axis=1)
+    fusedFeature.to_csv("fusedFeature.csv")
+    feature_id= pd.read_csv("./embedding_model/feature_id.csv")
+    LGB_ALL_K = fusedFeature[feature_id.iloc[:,0]]
+    LGB_ALL_K.index = fusedFeature.index
+    print("Feature selection completed!!!!\n\n")
+    return LGB_ALL_K
--- a/embedding_model/ifeature.py
+++ b/embedding_model/ifeature.py
+import subprocess
+import datetime
+import os
+import pandas as pd
+import shutil
+
+def run_command(cmd):
+    print(cmd)
+    return_code = subprocess.call(cmd, shell=True)
+    if return_code != 0:
+        print("ERROR: [{2}] Return code {0} when running the following command: {1}".format(return_code, cmd, datetime.datetime.now()))
+
+def write_file(filename):
+    file_path = os.getcwd() + '/' + filename
+    if os.path.exists(file_path):
+        shutil.rmtree(file_path)
+        os.mkdir(file_path)
+    else:
+        os.mkdir(file_path)
+    return (file_path)
+
+def extract_feature(inputfile,outfile):
+    write_file(outfile)
+    type = ['AAC','APAAC','CKSAAGP','CKSAAP','CTDC','CTDD','CTDT','CTriad',
+            'DDE','DPC','GAAC','GDPC','Geary','GTPC','KSCTriad','Moran',
+            'NMBroto','PAAC','QSOrder','SOCNumber','TPC']
+    for p in type:
+       cmd1 = "iFeature.py" + ' --file ' + \
+              inputfile + \
+              " --type " + p + ' --out ' + './' + outfile +'feature_' + p
+       run_command(cmd1)
+    return()
+
+def bind_feature(id_list,outfile):
+    PID=id_list
+    file_path = "./"+outfile
+    txt_list1 = []
+    for input in os.listdir(file_path):
+        txt_list1.append(file_path + '/' + input)
+    df1 = pd.read_table(txt_list1[0], low_memory=False)
+    for i in range(1, len(txt_list1)):
+        df1_i = pd.read_table(txt_list1[i], low_memory=False)
+        df1 = pd.merge(df1, df1_i, on='#', how='inner')
+    df1 = df1.iloc[:,1:]
+    ifeature = pd.concat([df1], axis=1)
+    ifeature.index= PID
+    ifeature.to_csv("./ifeature_merge.csv", index=True, header=True)
+    return ifeature
\ No newline at end of file
--- a/embedding_model/ifeature_vectors/feature_AAC
+++ b/embedding_model/ifeature_vectors/feature_AAC
+#	A	C	D	E	F	G	H	I	K	L	M	N	P	Q	R	S	T	V	W	Y
+HUM3|1|1	0.12369791666666667	0.010416666666666666	0.048177083333333336	0.0390625	0.01953125	0.05078125	0.009114583333333334	0.0546875	0.057291666666666664	0.078125	0.00390625	0.09375	0.041666666666666664	0.08203125	0.033854166666666664	0.09244791666666667	0.0546875	0.10026041666666667	0.0	0.006510416666666667
+AvrPm1a|2|1	0.06451612903225806	0.01935483870967742	0.06451612903225806	0.06451612903225806	0.06451612903225806	0.03870967741935484	0.05161290322580645	0.05806451612903226	0.09032258064516129	0.0967741935483871	0.03225806451612903	0.03225806451612903	0.03870967741935484	0.0064516129032258064	0.03870967741935484	0.0967741935483871	0.04516129032258064	0.03225806451612903	0.0064516129032258064	0.05806451612903226
+AVRa10|3|1	0.05084745762711865	0.03389830508474576	0.11016949152542373	0.03389830508474576	0.06779661016949153	0.07627118644067797	0.03389830508474576	0.03389830508474576	0.03389830508474576	0.06779661016949153	0.00847457627118644	0.01694915254237288	0.025423728813559324	0.06779661016949153	0.05084745762711865	0.059322033898305086	0.06779661016949153	0.0847457627118644	0.03389830508474576	0.0423728813559322
+SIX10|4|1	0.06711409395973154	0.013422818791946308	0.0738255033557047	0.0	0.04697986577181208	0.10738255033557047	0.013422818791946308	0.06040268456375839	0.03355704697986577	0.04697986577181208	0.006711409395973154	0.026845637583892617	0.06040268456375839	0.013422818791946308	0.06040268456375839	0.11409395973154363	0.087248322147651	0.10067114093959731	0.006711409395973154	0.06040268456375839
+HvEC016|5|1	0.05333333333333334	0.035555555555555556	0.02666666666666667	0.013333333333333334	0.04888888888888889	0.11555555555555555	0.0044444444444444444	0.1511111111111111	0.03111111111111111	0.04888888888888889	0.017777777777777778	0.04888888888888889	0.008888888888888889	0.08	0.022222222222222223	0.09333333333333334	0.08444444444444445	0.08	0.0044444444444444444	0.03111111111111111
+Six13|6|1	0.05460750853242321	0.040955631399317405	0.05460750853242321	0.07849829351535836	0.023890784982935155	0.05802047781569966	0.027303754266211604	0.030716723549488054	0.06484641638225255	0.07849829351535836	0.017064846416382253	0.04436860068259386	0.04778156996587031	0.040955631399317405	0.04778156996587031	0.07508532423208192	0.07508532423208192	0.06484641638225255	0.034129692832764506	0.040955631399317405
+Cce1|7|1	0.08527131782945736	0.06976744186046512	0.06201550387596899	0.046511627906976744	0.023255813953488372	0.10077519379844961	0.031007751937984496	0.023255813953488372	0.06201550387596899	0.046511627906976744	0.015503875968992248	0.007751937984496124	0.05426356589147287	0.046511627906976744	0.03875968992248062	0.07751937984496124	0.06976744186046512	0.07751937984496124	0.03875968992248062	0.023255813953488372
+SIX9|8|1	0.12280701754385964	0.05263157894736842	0.06140350877192982	0.02631578947368421	0.043859649122807015	0.06140350877192982	0.008771929824561403	0.008771929824561403	0.07017543859649122	0.08771929824561403	0.008771929824561403	0.043859649122807015	0.03508771929824561	0.043859649122807015	0.07894736842105263	0.02631578947368421	0.07894736842105263	0.043859649122807015	0.03508771929824561	0.06140350877192982
+SIX12|9|1	0.08411214953271028	0.09345794392523364	0.018691588785046728	0.028037383177570093	0.009345794392523364	0.11214953271028037	0.028037383177570093	0.037383177570093455	0.056074766355140186	0.056074766355140186	0.009345794392523364	0.06542056074766354	0.037383177570093455	0.056074766355140186	0.009345794392523364	0.102803738317757	0.04672897196261682	0.09345794392523364	0.028037383177570093	0.028037383177570093
+Six14|10|1	0.03409090909090909	0.06818181818181818	0.011363636363636364	0.03409090909090909	0.045454545454545456	0.125	0.011363636363636364	0.056818181818181816	0.022727272727272728	0.07954545454545454	0.03409090909090909	0.07954545454545454	0.056818181818181816	0.022727272727272728	0.07954545454545454	0.125	0.056818181818181816	0.045454545454545456	0.0	0.011363636363636364
+AvrSr27|11|1	0.041666666666666664	0.10416666666666667	0.006944444444444444	0.034722222222222224	0.041666666666666664	0.04861111111111111	0.04861111111111111	0.041666666666666664	0.08333333333333333	0.05555555555555555	0.034722222222222224	0.05555555555555555	0.05555555555555555	0.027777777777777776	0.027777777777777776	0.11805555555555555	0.09027777777777778	0.041666666666666664	0.013888888888888888	0.027777777777777776
+SIX11|12|1	0.03636363636363636	0.07272727272727272	0.045454545454545456	0.03636363636363636	0.02727272727272727	0.08181818181818182	0.03636363636363636	0.1	0.045454545454545456	0.045454545454545456	0.01818181818181818	0.07272727272727272	0.045454545454545456	0.06363636363636363	0.045454545454545456	0.07272727272727272	0.05454545454545454	0.045454545454545456	0.03636363636363636	0.01818181818181818
+OSP24|13|1	0.10294117647058823	0.058823529411764705	0.051470588235294115	0.0661764705882353	0.022058823529411766	0.07352941176470588	0.007352941176470588	0.029411764705882353	0.04411764705882353	0.08823529411764706	0.007352941176470588	0.058823529411764705	0.0661764705882353	0.08088235294117647	0.058823529411764705	0.0661764705882353	0.029411764705882353	0.0661764705882353	0.0	0.022058823529411766
+AVRa9|14|1	0.058823529411764705	0.029411764705882353	0.0392156862745098	0.049019607843137254	0.0196078431372549	0.09803921568627451	0.0392156862745098	0.0784313725490196	0.0392156862745098	0.06862745098039216	0.00980392156862745	0.06862745098039216	0.049019607843137254	0.029411764705882353	0.029411764705882353	0.12745098039215685	0.029411764705882353	0.058823529411764705	0.0196078431372549	0.058823529411764705
+SCR108|15|1	0.12871287128712872	0.04455445544554455	0.0297029702970297	0.0297029702970297	0.054455445544554455	0.0594059405940594	0.01485148514851485	0.01485148514851485	0.09405940594059406	0.07920792079207921	0.01485148514851485	0.06930693069306931	0.0297029702970297	0.04950495049504951	0.0297029702970297	0.06435643564356436	0.07920792079207921	0.07920792079207921	0.019801980198019802	0.01485148514851485
+PITG22926|16|1	0.07575757575757576	0.005050505050505051	0.025252525252525252	0.050505050505050504	0.03535353535353535	0.06060606060606061	0.025252525252525252	0.020202020202020204	0.09595959595959595	0.12121212121212122	0.030303030303030304	0.03535353535353535	0.030303030303030304	0.050505050505050504	0.0707070707070707	0.08080808080808081	0.09595959595959595	0.045454545454545456	0.020202020202020204	0.025252525252525252
+AVRa7|17|1	0.09821428571428571	0.017857142857142856	0.05357142857142857	0.08035714285714286	0.05357142857142857	0.0625	0.026785714285714284	0.044642857142857144	0.026785714285714284	0.09821428571428571	0.026785714285714284	0.017857142857142856	0.07142857142857142	0.026785714285714284	0.07142857142857142	0.08928571428571429	0.044642857142857144	0.026785714285714284	0.008928571428571428	0.05357142857142857
--- a/embedding_model/ifeature_vectors/feature_APAAC
+++ b/embedding_model/ifeature_vectors/feature_APAAC
--- a/embedding_model/ifeature_vectors/feature_CKSAAGP
+++ b/embedding_model/ifeature_vectors/feature_CKSAAGP
--- a/embedding_model/ifeature_vectors/feature_CKSAAP
+++ b/embedding_model/ifeature_vectors/feature_CKSAAP
--- a/embedding_model/ifeature_vectors/feature_CTDC
+++ b/embedding_model/ifeature_vectors/feature_CTDC
--- a/embedding_model/ifeature_vectors/feature_CTDD
+++ b/embedding_model/ifeature_vectors/feature_CTDD
--- a/embedding_model/ifeature_vectors/feature_CTDT
+++ b/embedding_model/ifeature_vectors/feature_CTDT
--- a/embedding_model/ifeature_vectors/feature_CTriad
+++ b/embedding_model/ifeature_vectors/feature_CTriad
--- a/embedding_model/ifeature_vectors/feature_DDE
+++ b/embedding_model/ifeature_vectors/feature_DDE
--- a/embedding_model/ifeature_vectors/feature_DPC
+++ b/embedding_model/ifeature_vectors/feature_DPC
--- a/embedding_model/ifeature_vectors/feature_GAAC
+++ b/embedding_model/ifeature_vectors/feature_GAAC
+#	alphatic	aromatic	postivecharge	negativecharge	uncharge
+HUM3|1|1	0.4114583333333333	0.026041666666666668	0.10026041666666667	0.08723958333333333	0.375
+AvrPm1a|2|1	0.3225806451612903	0.12903225806451613	0.18064516129032257	0.12903225806451613	0.23870967741935484
+AVRa10|3|1	0.3220338983050847	0.1440677966101695	0.11864406779661017	0.1440677966101695	0.2711864406779661
+SIX10|4|1	0.38926174496644295	0.11409395973154363	0.10738255033557047	0.0738255033557047	0.31543624161073824
+HvEC016|5|1	0.4666666666666667	0.08444444444444445	0.057777777777777775	0.04	0.3511111111111111
+Six13|6|1	0.3037542662116041	0.09897610921501707	0.13993174061433447	0.13310580204778158	0.3242320819112628
+Cce1|7|1	0.3488372093023256	0.08527131782945736	0.13178294573643412	0.10852713178294573	0.32558139534883723
+SIX9|8|1	0.3333333333333333	0.14035087719298245	0.15789473684210525	0.08771929824561403	0.2807017543859649
+SIX12|9|1	0.3925233644859813	0.06542056074766354	0.09345794392523364	0.04672897196261682	0.40186915887850466
+Six14|10|1	0.375	0.056818181818181816	0.11363636363636363	0.045454545454545456	0.4090909090909091
+AvrSr27|11|1	0.2638888888888889	0.08333333333333333	0.1597222222222222	0.041666666666666664	0.4513888888888889
+SIX11|12|1	0.32727272727272727	0.08181818181818182	0.12727272727272726	0.08181818181818182	0.38181818181818183
+OSP24|13|1	0.36764705882352944	0.04411764705882353	0.11029411764705882	0.11764705882352941	0.3602941176470588
+AVRa9|14|1	0.37254901960784315	0.09803921568627451	0.10784313725490197	0.08823529411764706	0.3333333333333333
+SCR108|15|1	0.37623762376237624	0.0891089108910891	0.13861386138613863	0.0594059405940594	0.33663366336633666
+PITG22926|16|1	0.35353535353535354	0.08080808080808081	0.1919191919191919	0.07575757575757576	0.29797979797979796
+AVRa7|17|1	0.35714285714285715	0.11607142857142858	0.125	0.13392857142857142	0.26785714285714285
--- a/embedding_model/ifeature_vectors/feature_GDPC
+++ b/embedding_model/ifeature_vectors/feature_GDPC
+#	alphaticr.alphaticr	alphaticr.aromatic	alphaticr.postivecharger	alphaticr.negativecharger	alphaticr.uncharger	aromatic.alphaticr	aromatic.aromatic	aromatic.postivecharger	aromatic.negativecharger	aromatic.uncharger	postivecharger.alphaticr	postivecharger.aromatic	postivecharger.postivecharger	postivecharger.negativecharger	postivecharger.uncharger	negativecharger.alphaticr	negativecharger.aromatic	negativecharger.postivecharger	negativecharger.negativecharger	negativecharger.uncharger	uncharger.alphaticr	uncharger.aromatic	uncharger.postivecharger	uncharger.negativecharger	uncharger.uncharger
+HUM3|1|1	0.17209908735332463	0.005215123859191656	0.028683181225554105	0.041720990873533245	0.16427640156453716	0.00651890482398957	0.0	0.0	0.011734028683181226	0.007822685788787484	0.01955671447196871	0.002607561929595828	0.027379400260756193	0.00651890482398957	0.04432855280312908	0.028683181225554105	0.009126466753585397	0.01694915254237288	0.00651890482398957	0.02607561929595828	0.18383311603650587	0.009126466753585397	0.027379400260756193	0.020860495436766623	0.13298565840938723
+AvrPm1a|2|1	0.09090909090909091	0.05194805194805195	0.08441558441558442	0.025974025974025976	0.07142857142857142	0.045454545454545456	0.006493506493506494	0.025974025974025976	0.025974025974025976	0.025974025974025976	0.08441558441558442	0.01948051948051948	0.012987012987012988	0.012987012987012988	0.05194805194805195	0.05194805194805195	0.012987012987012988	0.012987012987012988	0.025974025974025976	0.025974025974025976	0.045454545454545456	0.03896103896103896	0.045454545454545456	0.03896103896103896	0.06493506493506493
+AVRa10|3|1	0.10256410256410256	0.042735042735042736	0.03418803418803419	0.05128205128205128	0.09401709401709402	0.02564102564102564	0.008547008547008548	0.017094017094017096	0.042735042735042736	0.05128205128205128	0.02564102564102564	0.042735042735042736	0.008547008547008548	0.008547008547008548	0.02564102564102564	0.06837606837606838	0.03418803418803419	0.0	0.008547008547008548	0.03418803418803419	0.09401709401709402	0.017094017094017096	0.05982905982905983	0.03418803418803419	0.06837606837606838
+SIX10|4|1	0.12162162162162163	0.06756756756756757	0.02027027027027027	0.02027027027027027	0.16216216216216217	0.04054054054054054	0.006756756756756757	0.013513513513513514	0.006756756756756757	0.0472972972972973	0.0472972972972973	0.006756756756756757	0.013513513513513514	0.013513513513513514	0.02027027027027027	0.033783783783783786	0.006756756756756757	0.006756756756756757	0.006756756756756757	0.02027027027027027	0.14189189189189189	0.02702702702702703	0.05405405405405406	0.02702702702702703	0.06756756756756757
+HvEC016|5|1	0.21428571428571427	0.022321428571428572	0.026785714285714284	0.022321428571428572	0.17857142857142858	0.03571428571428571	0.017857142857142856	0.0	0.004464285714285714	0.026785714285714284	0.026785714285714284	0.0	0.0	0.004464285714285714	0.026785714285714284	0.017857142857142856	0.004464285714285714	0.004464285714285714	0.0	0.013392857142857142	0.16964285714285715	0.04017857142857143	0.026785714285714284	0.008928571428571428	0.10714285714285714
+Six13|6|1	0.07534246575342465	0.023972602739726026	0.04452054794520548	0.03767123287671233	0.1232876712328767	0.03424657534246575	0.003424657534246575	0.0136986301369863	0.00684931506849315	0.0410958904109589	0.0410958904109589	0.0273972602739726	0.017123287671232876	0.00684931506849315	0.04794520547945205	0.0547945205479452	0.00684931506849315	0.02054794520547945	0.0273972602739726	0.023972602739726026	0.0958904109589041	0.03767123287671233	0.04452054794520548	0.0547945205479452	0.08904109589041095
+Cce1|7|1	0.1171875	0.0234375	0.0546875	0.0390625	0.1171875	0.0390625	0.0078125	0.0078125	0.0	0.03125	0.0546875	0.0	0.03125	0.0078125	0.03125	0.015625	0.0390625	0.015625	0.0078125	0.03125	0.1171875	0.015625	0.0234375	0.0546875	0.1171875
+SIX9|8|1	0.1504424778761062	0.02654867256637168	0.04424778761061947	0.02654867256637168	0.08849557522123894	0.02654867256637168	0.017699115044247787	0.035398230088495575	0.017699115044247787	0.04424778761061947	0.04424778761061947	0.035398230088495575	0.017699115044247787	0.008849557522123894	0.04424778761061947	0.04424778761061947	0.008849557522123894	0.008849557522123894	0.0	0.02654867256637168	0.061946902654867256	0.05309734513274336	0.05309734513274336	0.035398230088495575	0.07964601769911504
+SIX12|9|1	0.1509433962264151	0.03773584905660377	0.02830188679245283	0.009433962264150943	0.16981132075471697	0.018867924528301886	0.009433962264150943	0.009433962264150943	0.009433962264150943	0.018867924528301886	0.02830188679245283	0.0	0.009433962264150943	0.0	0.05660377358490566	0.018867924528301886	0.009433962264150943	0.0	0.0	0.018867924528301886	0.1792452830188679	0.009433962264150943	0.04716981132075472	0.02830188679245283	0.1320754716981132
+Six14|10|1	0.16091954022988506	0.022988505747126436	0.011494252873563218	0.0	0.1839080459770115	0.04597701149425287	0.0	0.011494252873563218	0.0	0.0	0.034482758620689655	0.011494252873563218	0.0	0.022988505747126436	0.04597701149425287	0.022988505747126436	0.011494252873563218	0.0	0.0	0.011494252873563218	0.10344827586206896	0.011494252873563218	0.09195402298850575	0.022988505747126436	0.1724137931034483
+AvrSr27|11|1	0.13986013986013987	0.013986013986013986	0.02097902097902098	0.0	0.09090909090909091	0.02097902097902098	0.0	0.006993006993006993	0.013986013986013986	0.03496503496503497	0.02097902097902098	0.03496503496503497	0.04895104895104895	0.0	0.055944055944055944	0.0	0.0	0.006993006993006993	0.006993006993006993	0.027972027972027972	0.07692307692307693	0.03496503496503497	0.07692307692307693	0.02097902097902098	0.24475524475524477
+SIX11|12|1	0.13761467889908258	0.01834862385321101	0.027522935779816515	0.03669724770642202	0.10091743119266056	0.009174311926605505	0.009174311926605505	0.0	0.009174311926605505	0.05504587155963303	0.03669724770642202	0.009174311926605505	0.027522935779816515	0.01834862385321101	0.03669724770642202	0.01834862385321101	0.01834862385321101	0.009174311926605505	0.0	0.03669724770642202	0.11926605504587157	0.027522935779816515	0.06422018348623854	0.01834862385321101	0.1559633027522936
+OSP24|13|1	0.16296296296296298	0.014814814814814815	0.037037037037037035	0.02962962962962963	0.11851851851851852	0.022222222222222223	0.0	0.014814814814814815	0.007407407407407408	0.0	0.037037037037037035	0.007407407407407408	0.014814814814814815	0.0	0.05185185185185185	0.044444444444444446	0.0	0.0	0.05185185185185185	0.022222222222222223	0.0962962962962963	0.022222222222222223	0.044444444444444446	0.02962962962962963	0.17037037037037037
+AVRa9|14|1	0.10891089108910891	0.0297029702970297	0.0594059405940594	0.039603960396039604	0.13861386138613863	0.04950495049504951	0.009900990099009901	0.0	0.009900990099009901	0.0297029702970297	0.0297029702970297	0.009900990099009901	0.009900990099009901	0.009900990099009901	0.04950495049504951	0.039603960396039604	0.009900990099009901	0.0	0.0	0.039603960396039604	0.13861386138613863	0.039603960396039604	0.039603960396039604	0.0297029702970297	0.07920792079207921
+SCR108|15|1	0.14925373134328357	0.024875621890547265	0.06965174129353234	0.009950248756218905	0.12437810945273632	0.024875621890547265	0.004975124378109453	0.009950248756218905	0.0	0.04477611940298507	0.04975124378109453	0.009950248756218905	0.029850746268656716	0.009950248756218905	0.03980099502487562	0.029850746268656716	0.014925373134328358	0.004975124378109453	0.0	0.009950248756218905	0.11940298507462686	0.03482587064676617	0.024875621890547265	0.03980099502487562	0.11940298507462686
+PITG22926|16|1	0.12690355329949238	0.025380710659898477	0.06598984771573604	0.02030456852791878	0.116751269035533	0.04568527918781726	0.0	0.015228426395939087	0.005076142131979695	0.015228426395939087	0.06091370558375635	0.015228426395939087	0.03553299492385787	0.025380710659898477	0.05583756345177665	0.025380710659898477	0.0	0.015228426395939087	0.01015228426395939	0.025380710659898477	0.09137055837563451	0.04060913705583756	0.06091370558375635	0.015228426395939087	0.08629441624365482
+AVRa7|17|1	0.11711711711711711	0.036036036036036036	0.04504504504504504	0.05405405405405406	0.10810810810810811	0.018018018018018018	0.018018018018018018	0.018018018018018018	0.02702702702702703	0.036036036036036036	0.036036036036036036	0.018018018018018018	0.036036036036036036	0.018018018018018018	0.018018018018018018	0.04504504504504504	0.018018018018018018	0.009009009009009009	0.009009009009009009	0.05405405405405406	0.13513513513513514	0.02702702702702703	0.018018018018018018	0.02702702702702703	0.05405405405405406
--- a/embedding_model/ifeature_vectors/feature_GTPC
+++ b/embedding_model/ifeature_vectors/feature_GTPC
--- a/embedding_model/ifeature_vectors/feature_Geary
+++ b/embedding_model/ifeature_vectors/feature_Geary
--- a/embedding_model/ifeature_vectors/feature_KSCTriad
+++ b/embedding_model/ifeature_vectors/feature_KSCTriad
--- a/embedding_model/ifeature_vectors/feature_Moran
+++ b/embedding_model/ifeature_vectors/feature_Moran
--- a/embedding_model/ifeature_vectors/feature_NMBroto
+++ b/embedding_model/ifeature_vectors/feature_NMBroto
--- a/embedding_model/ifeature_vectors/feature_PAAC
+++ b/embedding_model/ifeature_vectors/feature_PAAC
--- a/embedding_model/ifeature_vectors/feature_QSOrder
+++ b/embedding_model/ifeature_vectors/feature_QSOrder
--- a/embedding_model/ifeature_vectors/feature_SOCNumber
+++ b/embedding_model/ifeature_vectors/feature_SOCNumber
--- a/embedding_model/ifeature_vectors/feature_TPC
+++ b/embedding_model/ifeature_vectors/feature_TPC
--- a/embedding_model/preprocessing/__init__.py
+++ b/embedding_model/preprocessing/__init__.py
--- a/embedding_model/preprocessing/__pycache__/__init__.cpython-37.pyc
+++ b/embedding_model/preprocessing/__pycache__/__init__.cpython-37.pyc
--- a/embedding_model/preprocessing/__pycache__/__init__.cpython-38.pyc
+++ b/embedding_model/preprocessing/__pycache__/__init__.cpython-38.pyc
--- a/embedding_model/preprocessing/__pycache__/alphabets.cpython-37.pyc
+++ b/embedding_model/preprocessing/__pycache__/alphabets.cpython-37.pyc
--- a/embedding_model/preprocessing/__pycache__/alphabets.cpython-38.pyc
+++ b/embedding_model/preprocessing/__pycache__/alphabets.cpython-38.pyc
--- a/embedding_model/preprocessing/alphabets.py
+++ b/embedding_model/preprocessing/alphabets.py
--- a/embedding_model/src/.idea/.gitignore
+++ b/embedding_model/src/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/embedding_model/src/.idea/inspectionProfiles/profiles_settings.xml
+++ b/embedding_model/src/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/embedding_model/src/.idea/misc.xml
+++ b/embedding_model/src/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/embedding_model/src/.idea/modules.xml
+++ b/embedding_model/src/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/src.iml" filepath="$PROJECT_DIR$/.idea/src.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/embedding_model/src/.idea/src.iml
+++ b/embedding_model/src/.idea/src.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/embedding_model/src/SSA_embedding.py
+++ b/embedding_model/src/SSA_embedding.py
--- a/embedding_model/src/UniRep_emb.py
+++ b/embedding_model/src/UniRep_emb.py
--- a/embedding_model/src/__init__.py
+++ b/embedding_model/src/__init__.py
--- a/embedding_model/src/__pycache__/SSA_embedding.cpython-37.pyc
+++ b/embedding_model/src/__pycache__/SSA_embedding.cpython-37.pyc
--- a/embedding_model/src/__pycache__/SSA_embedding.cpython-38.pyc
+++ b/embedding_model/src/__pycache__/SSA_embedding.cpython-38.pyc
--- a/embedding_model/src/__pycache__/TAPE_emb.cpython-37.pyc
+++ b/embedding_model/src/__pycache__/TAPE_emb.cpython-37.pyc
--- a/embedding_model/src/__pycache__/UniRep_emb.cpython-37.pyc
+++ b/embedding_model/src/__pycache__/UniRep_emb.cpython-37.pyc
--- a/embedding_model/src/__pycache__/UniRep_emb.cpython-38.pyc
+++ b/embedding_model/src/__pycache__/UniRep_emb.cpython-38.pyc
--- a/embedding_model/src/__pycache__/W2V_emb.cpython-37.pyc
+++ b/embedding_model/src/__pycache__/W2V_emb.cpython-37.pyc
--- a/embedding_model/src/__pycache__/__init__.cpython-37.pyc
+++ b/embedding_model/src/__pycache__/__init__.cpython-37.pyc
--- a/embedding_model/src/__pycache__/__init__.cpython-38.pyc
+++ b/embedding_model/src/__pycache__/__init__.cpython-38.pyc
--- a/embedding_model/src/models/__init__.py
+++ b/embedding_model/src/models/__init__.py
--- a/embedding_model/src/models/__pycache__/__init__.cpython-37.pyc
+++ b/embedding_model/src/models/__pycache__/__init__.cpython-37.pyc
--- a/embedding_model/src/models/__pycache__/__init__.cpython-38.pyc
+++ b/embedding_model/src/models/__pycache__/__init__.cpython-38.pyc
--- a/embedding_model/src/models/__pycache__/comparison.cpython-37.pyc
+++ b/embedding_model/src/models/__pycache__/comparison.cpython-37.pyc
--- a/embedding_model/src/models/__pycache__/comparison.cpython-38.pyc
+++ b/embedding_model/src/models/__pycache__/comparison.cpython-38.pyc
--- a/embedding_model/src/models/__pycache__/embedding.cpython-37.pyc
+++ b/embedding_model/src/models/__pycache__/embedding.cpython-37.pyc
--- a/embedding_model/src/models/__pycache__/embedding.cpython-38.pyc
+++ b/embedding_model/src/models/__pycache__/embedding.cpython-38.pyc
--- a/embedding_model/src/models/__pycache__/multitask.cpython-37.pyc
+++ b/embedding_model/src/models/__pycache__/multitask.cpython-37.pyc
--- a/embedding_model/src/models/__pycache__/multitask.cpython-38.pyc
+++ b/embedding_model/src/models/__pycache__/multitask.cpython-38.pyc
--- a/embedding_model/src/models/__pycache__/sequence.cpython-37.pyc
+++ b/embedding_model/src/models/__pycache__/sequence.cpython-37.pyc
--- a/embedding_model/src/models/__pycache__/sequence.cpython-38.pyc
+++ b/embedding_model/src/models/__pycache__/sequence.cpython-38.pyc
--- a/embedding_model/src/models/comparison.py
+++ b/embedding_model/src/models/comparison.py
--- a/embedding_model/src/models/embedding.py
+++ b/embedding_model/src/models/embedding.py
--- a/embedding_model/src/models/multitask.py
+++ b/embedding_model/src/models/multitask.py
--- a/embedding_model/src/models/sequence.py
+++ b/embedding_model/src/models/sequence.py
--- a/ifeature_merge.csv
+++ b/ifeature_merge.csv
--- a/independent_test_pos.fasta
+++ b/independent_test_pos.fasta
--- a/mlp_model/mlp_gan_selection1752.pkl
+++ b/mlp_model/mlp_gan_selection1752.pkl
--- a/requirements.txt
+++ b/requirements.txt
--- a/test_results.csv
+++ b/test_results.csv