|
@@ -5,7 +5,6 @@ import torchvision.transforms as transforms
|
|
|
import torchvision.datasets as dsets
|
|
|
import numpy as np
|
|
|
from skorch import NeuralNet
|
|
|
-from skorch.callbacks import EpochScoring
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
from spacecutter.callbacks import AscensionCallback
|
|
@@ -21,6 +20,7 @@ class LogisticRegression(nn.Module):
|
|
|
outputs = self.linear(x)
|
|
|
return outputs
|
|
|
|
|
|
+# pyTorch 逻辑回归 MNIST 数据
|
|
|
def regression_on_mnist():
|
|
|
batch_size = 100
|
|
|
n_iters = 5000
|
|
@@ -66,7 +66,9 @@ def regression_on_mnist():
|
|
|
accuracy = 100 * correct/total
|
|
|
print("Iteration: {}. Loss: {}. Accuracy: {}.".format(iter, loss.item(), accuracy))
|
|
|
|
|
|
+# pyTorch + Skorch 有序逻辑回归
|
|
|
def ordinal_regression():
|
|
|
+ # 0. 数据准备
|
|
|
X = np.array([
|
|
|
[0.5, 0.1, -0.1],
|
|
|
[1.0, 0.2, 0.6],
|
|
@@ -78,42 +80,51 @@ def ordinal_regression():
|
|
|
num_features = X.shape[1]
|
|
|
num_classes = len(np.unique(y))
|
|
|
|
|
|
- predictor = nn.Sequential(
|
|
|
+ # 1. 有序逻辑回归
|
|
|
+ predictor = nn.Sequential( # 预测器
|
|
|
nn.Linear(num_features, num_features),
|
|
|
nn.ReLU(),
|
|
|
nn.Linear(num_features, 1)
|
|
|
)
|
|
|
|
|
|
- model = OrdinalLogisticModel(predictor, num_classes)
|
|
|
-
|
|
|
- y_pred = model(torch.as_tensor(X))
|
|
|
-
|
|
|
- print(y_pred)
|
|
|
-
|
|
|
+ # model = OrdinalLogisticModel(predictor, num_classes)
|
|
|
+ # y_pred = model(torch.as_tensor(X))
|
|
|
+ # print(y_pred)
|
|
|
# tensor([[0.2325, 0.2191, 0.5485],
|
|
|
# [0.2324, 0.2191, 0.5485],
|
|
|
# [0.2607, 0.2287, 0.5106]], grad_fn=<CatBackward>)
|
|
|
|
|
|
+ # 2. Skorch 训练模型
|
|
|
skorch_model = NeuralNet(
|
|
|
module=OrdinalLogisticModel,
|
|
|
module__predictor=predictor,
|
|
|
module__num_classes=num_classes,
|
|
|
- criterion=CumulativeLinkLoss,
|
|
|
+ optimizer=torch.optim.Adam, # Adam收敛速度快,非SGD优化算法
|
|
|
+ criterion=CumulativeLinkLoss, # 与OrdinalLogisticModel匹配的累计链接损失函数,常用交叉熵 torch.nn.CrossEntropyLoss 此处不适用
|
|
|
train_split=None,
|
|
|
- max_epochs= 30,
|
|
|
+ max_epochs= 5000, # 训练次数(epoch=全部样本训练一次,iteration=取batchsize样本训练一次,SGD随机梯度下降优化算法才分iteration)
|
|
|
callbacks=[
|
|
|
('ascension', AscensionCallback())
|
|
|
],
|
|
|
)
|
|
|
|
|
|
- skorch_model.fit(X, y)
|
|
|
+ skorch_model.fit(X, y) # 训练
|
|
|
+ # y_proba = skorch_model.predict_proba(X) # 预测
|
|
|
+
|
|
|
+ # 3. Matplotlib 可视化
|
|
|
+ train_loss = skorch_model.history[:, 'train_loss']
|
|
|
+
|
|
|
+ plt.plot(train_loss, label='Train Loss')
|
|
|
+ plt.xlabel('Epoch')
|
|
|
+ plt.ylabel('Loss')
|
|
|
+ plt.legend()
|
|
|
+ plt.show()
|
|
|
|
|
|
- # train_acc = model.history[:, 'train_accuracy']
|
|
|
- # valid_acc = model.history[:, 'valid_accuracy']
|
|
|
+import pandas as pd
|
|
|
|
|
|
- # plt.plot(train_acc, label='Train Accuracy')
|
|
|
- # plt.plot(valid_acc, label='Validation Accuracy')
|
|
|
- # plt.xlabel('Epoch')
|
|
|
- # plt.ylabel('Accuracy')
|
|
|
- # plt.legend()
|
|
|
- # plt.show()
|
|
|
+def test():
|
|
|
+ data = pd.read_stata('./data/CGSS/CGSS2021_20240607.dta', convert_categoricals=False)
|
|
|
+ # with pd.io.stata.StataReader('/Users/marion/Downloads/CGSS2021_20240607.dta') as reader:
|
|
|
+ # vl = reader.value_labels()
|
|
|
+ # print(vl)
|
|
|
+ data.head()
|