当前位置:Gxlcms > Python > 详解用TensorFlow实现逻辑回归算法

详解用TensorFlow实现逻辑回归算法

时间:2021-07-01 10:21:17 帮助过:49人阅读

这篇文章主要介绍了关于详解用TensorFlow实现逻辑回归算法,有着一定的参考价值,现在分享给大家,有需要的朋友可以参考一下

本文将实现逻辑回归算法,预测低出生体重的概率。

  1. # Logistic Regression
  2. # 逻辑回归
  3. #----------------------------------
  4. #
  5. # This function shows how to use TensorFlow to
  6. # solve logistic regression.
  7. # y = sigmoid(Ax + b)
  8. #
  9. # We will use the low birth weight data, specifically:
  10. # y = 0 or 1 = low birth weight
  11. # x = demographic and medical history data
  12. import matplotlib.pyplot as plt
  13. import numpy as np
  14. import tensorflow as tf
  15. import requests
  16. from tensorflow.python.framework import ops
  17. import os.path
  18. import csv
  19. ops.reset_default_graph()
  20. # Create graph
  21. sess = tf.Session()
  22. ###
  23. # Obtain and prepare data for modeling
  24. ###
  25. # name of data file
  26. birth_weight_file = 'birth_weight.csv'
  27. # download data and create data file if file does not exist in current directory
  28. if not os.path.exists(birth_weight_file):
  29. birthdata_url = 'https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'
  30. birth_file = requests.get(birthdata_url)
  31. birth_data = birth_file.text.split('\r\n')
  32. birth_header = birth_data[0].split('\t')
  33. birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_data[1:] if len(y)>=1]
  34. with open(birth_weight_file, "w") as f:
  35. writer = csv.writer(f)
  36. writer.writerow(birth_header)
  37. writer.writerows(birth_data)
  38. f.close()
  39. # read birth weight data into memory
  40. birth_data = []
  41. with open(birth_weight_file, newline='') as csvfile:
  42. csv_reader = csv.reader(csvfile)
  43. birth_header = next(csv_reader)
  44. for row in csv_reader:
  45. birth_data.append(row)
  46. birth_data = [[float(x) for x in row] for row in birth_data]
  47. # Pull out target variable
  48. y_vals = np.array([x[0] for x in birth_data])
  49. # Pull out predictor variables (not id, not target, and not birthweight)
  50. x_vals = np.array([x[1:8] for x in birth_data])
  51. # set for reproducible results
  52. seed = 99
  53. np.random.seed(seed)
  54. tf.set_random_seed(seed)
  55. # Split data into train/test = 80%/20%
  56. # 分割数据集为测试集和训练集
  57. train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False)
  58. test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))
  59. x_vals_train = x_vals[train_indices]
  60. x_vals_test = x_vals[test_indices]
  61. y_vals_train = y_vals[train_indices]
  62. y_vals_test = y_vals[test_indices]
  63. # Normalize by column (min-max norm)
  64. # 将所有特征缩放到0和1区间(min-max缩放),逻辑回归收敛的效果更好
  65. # 归一化特征
  66. def normalize_cols(m):
  67. col_max = m.max(axis=0)
  68. col_min = m.min(axis=0)
  69. return (m-col_min) / (col_max - col_min)
  70. x_vals_train = np.nan_to_num(normalize_cols(x_vals_train))
  71. x_vals_test = np.nan_to_num(normalize_cols(x_vals_test))
  72. ###
  73. # Define Tensorflow computational graph¶
  74. ###
  75. # Declare batch size
  76. batch_size = 25
  77. # Initialize placeholders
  78. x_data = tf.placeholder(shape=[None, 7], dtype=tf.float32)
  79. y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)
  80. # Create variables for linear regression
  81. A = tf.Variable(tf.random_normal(shape=[7,1]))
  82. b = tf.Variable(tf.random_normal(shape=[1,1]))
  83. # Declare model operations
  84. model_output = tf.add(tf.matmul(x_data, A), b)
  85. # Declare loss function (Cross Entropy loss)
  86. loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=model_output, labels=y_target))
  87. # Declare optimizer
  88. my_opt = tf.train.GradientDescentOptimizer(0.01)
  89. train_step = my_opt.minimize(loss)
  90. ###
  91. # Train model
  92. ###
  93. # Initialize variables
  94. init = tf.global_variables_initializer()
  95. sess.run(init)
  96. # Actual Prediction
  97. # 除记录损失函数外,也需要记录分类器在训练集和测试集上的准确度。
  98. # 所以创建一个返回准确度的预测函数
  99. prediction = tf.round(tf.sigmoid(model_output))
  100. predictions_correct = tf.cast(tf.equal(prediction, y_target), tf.float32)
  101. accuracy = tf.reduce_mean(predictions_correct)
  102. # Training loop
  103. # 开始遍历迭代训练,记录损失值和准确度
  104. loss_vec = []
  105. train_acc = []
  106. test_acc = []
  107. for i in range(1500):
  108. rand_index = np.random.choice(len(x_vals_train), size=batch_size)
  109. rand_x = x_vals_train[rand_index]
  110. rand_y = np.transpose([y_vals_train[rand_index]])
  111. sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})
  112. temp_loss = sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y})
  113. loss_vec.append(temp_loss)
  114. temp_acc_train = sess.run(accuracy, feed_dict={x_data: x_vals_train, y_target: np.transpose([y_vals_train])})
  115. train_acc.append(temp_acc_train)
  116. temp_acc_test = sess.run(accuracy, feed_dict={x_data: x_vals_test, y_target: np.transpose([y_vals_test])})
  117. test_acc.append(temp_acc_test)
  118. if (i+1)%300==0:
  119. print('Loss = ' + str(temp_loss))
  120. ###
  121. # Display model performance
  122. ###
  123. # 绘制损失和准确度
  124. plt.plot(loss_vec, 'k-')
  125. plt.title('Cross Entropy Loss per Generation')
  126. plt.xlabel('Generation')
  127. plt.ylabel('Cross Entropy Loss')
  128. plt.show()
  129. # Plot train and test accuracy
  130. plt.plot(train_acc, 'k-', label='Train Set Accuracy')
  131. plt.plot(test_acc, 'r--', label='Test Set Accuracy')
  132. plt.title('Train and Test Accuracy')
  133. plt.xlabel('Generation')
  134. plt.ylabel('Accuracy')
  135. plt.legend(loc='lower right')
  136. plt.show()

数据结果:

Loss = 0.845124
Loss = 0.658061
Loss = 0.471852
Loss = 0.643469
Loss = 0.672077

迭代1500次的交叉熵损失图


迭代1500次的测试集和训练集的准确度图

相关推荐:

用TensorFlow实现lasso回归和岭回归算法的示例

用TensorFlow实现戴明回归算法的示例


以上就是详解用TensorFlow实现逻辑回归算法的详细内容,更多请关注Gxl网其它相关文章!

人气教程排行