My solutions to Harvard's online course CS50AI, An Introduction to Machine Learning
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
3.2 KiB

4 years ago
  1. import cv2
  2. import numpy as np
  3. import os
  4. import sys
  5. import tensorflow as tf
  6. from sklearn.model_selection import train_test_split
  7. EPOCHS = 10
  8. IMG_WIDTH = 30
  9. IMG_HEIGHT = 30
  10. NUM_CATEGORIES = 43
  11. TEST_SIZE = 0.4
  12. def main():
  13. # Check command-line arguments
  14. if len(sys.argv) not in [2, 3]:
  15. sys.exit("Usage: python traffic.py data_directory [model.h5]")
  16. # Get image arrays and labels for all image files
  17. images, labels = load_data(sys.argv[1])
  18. # Split data into training and testing sets
  19. labels = tf.keras.utils.to_categorical(labels)
  20. x_train, x_test, y_train, y_test = train_test_split(
  21. np.array(images), np.array(labels), test_size=TEST_SIZE
  22. )
  23. # Get a compiled neural network
  24. model = get_model()
  25. # Fit model on training data
  26. model.fit(x_train, y_train, epochs=EPOCHS)
  27. # Evaluate neural network performance
  28. model.evaluate(x_test, y_test, verbose=2)
  29. # Save model to file
  30. if len(sys.argv) == 3:
  31. filename = sys.argv[2]
  32. model.save(filename)
  33. print(f"Model saved to {filename}.")
  34. def load_data(data_dir):
  35. """
  36. Load image data from directory `data_dir`.
  37. Assume `data_dir` has one directory named after each category, numbered
  38. 0 through NUM_CATEGORIES - 1. Inside each category directory will be some
  39. number of image files.
  40. Return tuple `(images, labels)`. `images` should be a list of all
  41. of the images in the data directory, where each image is formatted as a
  42. numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
  43. be a list of integer labels, representing the categories for each of the
  44. corresponding `images`.
  45. """
  46. categories = os.listdir(data_dir)
  47. labels = []
  48. images = []
  49. for i in range(NUM_CATEGORIES):
  50. imgs = os.listdir(os.path.join(data_dir, str(i)))
  51. for j in imgs:
  52. img = cv2.imread(os.path.join(data_dir, str(i), j))
  53. resized = cv2.resize(img, (int(IMG_WIDTH),int(IMG_HEIGHT)))
  54. images.append(resized)
  55. labels.append(i)
  56. return (images, labels, )
  57. def get_model():
  58. """
  59. Returns a compiled convolutional neural network model. Assume that the
  60. `input_shape` of the first layer is `(IMG_WIDTH, IMG_HEIGHT, 3)`.
  61. The output layer should have `NUM_CATEGORIES` units, one for each category.
  62. """
  63. DROPOUT = 0.5
  64. CONV_LAYER_SIZE = (5, 5)
  65. CONV_LAYER_NUM = 32
  66. POOL_SIZE = (2, 2)
  67. model = tf.keras.models.Sequential([
  68. tf.keras.layers.Conv2D(
  69. CONV_LAYER_NUM, CONV_LAYER_SIZE, activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
  70. ),
  71. tf.keras.layers.MaxPooling2D(pool_size=POOL_SIZE),
  72. tf.keras.layers.Flatten(),
  73. tf.keras.layers.Dense(64, activation="relu"),
  74. tf.keras.layers.Dense(32, activation="relu"),
  75. tf.keras.layers.Dropout(DROPOUT),
  76. tf.keras.layers.Dense(NUM_CATEGORIES, activation="softmax")
  77. ])
  78. model.compile(
  79. optimizer="adam",
  80. loss="categorical_crossentropy",
  81. metrics=["accuracy"]
  82. )
  83. return model
  84. if __name__ == "__main__":
  85. main()