郑毅 1 month ago
parent
commit
b0a7d92332
4 changed files with 32 additions and 20 deletions
  1. 1 0
      .gitignore
  2. 1 1
      __main__.py
  3. 0 0
      ddd.csv
  4. 30 19
      torch_test/logistic_regression.py

+ 1 - 0
.gitignore

@@ -5,3 +5,4 @@ bin
 output
 output
 vendor
 vendor
 .DS_Store
 .DS_Store
+data

+ 1 - 1
__main__.py

@@ -1,7 +1,7 @@
 from torch_test.logistic_regression import *
 from torch_test.logistic_regression import *
 
 
 def main():
 def main():
-    ordinal_regression()
+    test()
 
 
 # 程序入口
 # 程序入口
 if __name__ == '__main__':
 if __name__ == '__main__':

File diff suppressed because it is too large
+ 0 - 0
ddd.csv


+ 30 - 19
torch_test/logistic_regression.py

@@ -5,7 +5,6 @@ import torchvision.transforms as transforms
 import torchvision.datasets as dsets
 import torchvision.datasets as dsets
 import numpy as np
 import numpy as np
 from skorch import NeuralNet
 from skorch import NeuralNet
-from skorch.callbacks import EpochScoring
 from matplotlib import pyplot as plt
 from matplotlib import pyplot as plt
 
 
 from spacecutter.callbacks import AscensionCallback
 from spacecutter.callbacks import AscensionCallback
@@ -21,6 +20,7 @@ class LogisticRegression(nn.Module):
         outputs = self.linear(x)
         outputs = self.linear(x)
         return outputs
         return outputs
 
 
+# pyTorch 逻辑回归 MNIST 数据
 def regression_on_mnist():
 def regression_on_mnist():
     batch_size = 100
     batch_size = 100
     n_iters = 5000
     n_iters = 5000
@@ -66,7 +66,9 @@ def regression_on_mnist():
                 accuracy = 100 * correct/total
                 accuracy = 100 * correct/total
                 print("Iteration: {}. Loss: {}. Accuracy: {}.".format(iter, loss.item(), accuracy))
                 print("Iteration: {}. Loss: {}. Accuracy: {}.".format(iter, loss.item(), accuracy))
 
 
+# pyTorch + Skorch 有序逻辑回归
 def ordinal_regression():
 def ordinal_regression():
+    # 0. 数据准备
     X = np.array([
     X = np.array([
         [0.5, 0.1, -0.1],
         [0.5, 0.1, -0.1],
         [1.0, 0.2, 0.6],
         [1.0, 0.2, 0.6],
@@ -78,42 +80,51 @@ def ordinal_regression():
     num_features = X.shape[1]
     num_features = X.shape[1]
     num_classes = len(np.unique(y))
     num_classes = len(np.unique(y))
 
 
-    predictor = nn.Sequential(
+    # 1. 有序逻辑回归
+    predictor = nn.Sequential( # 预测器
         nn.Linear(num_features, num_features),
         nn.Linear(num_features, num_features),
         nn.ReLU(),
         nn.ReLU(),
         nn.Linear(num_features, 1)
         nn.Linear(num_features, 1)
     )
     )
 
 
-    model = OrdinalLogisticModel(predictor, num_classes)
-
-    y_pred = model(torch.as_tensor(X))
-
-    print(y_pred) 
-
+    # model = OrdinalLogisticModel(predictor, num_classes)
+    # y_pred = model(torch.as_tensor(X))
+    # print(y_pred) 
     # tensor([[0.2325, 0.2191, 0.5485],
     # tensor([[0.2325, 0.2191, 0.5485],
     #         [0.2324, 0.2191, 0.5485],
     #         [0.2324, 0.2191, 0.5485],
     #         [0.2607, 0.2287, 0.5106]], grad_fn=<CatBackward>)
     #         [0.2607, 0.2287, 0.5106]], grad_fn=<CatBackward>)
 
 
+    # 2. Skorch 训练模型
     skorch_model = NeuralNet(
     skorch_model = NeuralNet(
         module=OrdinalLogisticModel,
         module=OrdinalLogisticModel,
         module__predictor=predictor,
         module__predictor=predictor,
         module__num_classes=num_classes,
         module__num_classes=num_classes,
-        criterion=CumulativeLinkLoss,
+        optimizer=torch.optim.Adam, # Adam收敛速度快,非SGD优化算法
+        criterion=CumulativeLinkLoss, # 与OrdinalLogisticModel匹配的累计链接损失函数,常用交叉熵 torch.nn.CrossEntropyLoss 此处不适用
         train_split=None,
         train_split=None,
-        max_epochs= 30,
+        max_epochs= 5000, # 训练次数(epoch=全部样本训练一次,iteration=取batchsize样本训练一次,SGD随机梯度下降优化算法才分iteration)
         callbacks=[
         callbacks=[
             ('ascension', AscensionCallback())
             ('ascension', AscensionCallback())
         ],
         ],
     )
     )
 
 
-    skorch_model.fit(X, y)
+    skorch_model.fit(X, y) # 训练
+    # y_proba = skorch_model.predict_proba(X) # 预测
+
+    # 3. Matplotlib 可视化
+    train_loss = skorch_model.history[:, 'train_loss']
+
+    plt.plot(train_loss, label='Train Loss')
+    plt.xlabel('Epoch')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.show()
 
 
-    # train_acc = model.history[:, 'train_accuracy']
-    # valid_acc = model.history[:, 'valid_accuracy']
+import pandas as pd
 
 
-    # plt.plot(train_acc, label='Train Accuracy')
-    # plt.plot(valid_acc, label='Validation Accuracy')
-    # plt.xlabel('Epoch')
-    # plt.ylabel('Accuracy')
-    # plt.legend()
-    # plt.show()
+def test():
+    data = pd.read_stata('./data/CGSS/CGSS2021_20240607.dta', convert_categoricals=False)
+    # with pd.io.stata.StataReader('/Users/marion/Downloads/CGSS2021_20240607.dta') as reader:
+    #     vl = reader.value_labels()
+    #     print(vl)
+    data.head()

Some files were not shown because too many files changed in this diff