tensorflow和keras通用数据集的制作(2)

lijingle 深度学习 2020-12-19 09:55 2107人围观

前面博客有写到tensorflow2.0制作自己的数据集，经过验证是可以进行使用的，也是安装官方网站上的例子进行修改完成的，但是用起来并不是特别方。数据集只有一个train，有的很多的源代码中是使用的trainx和trainy两个值进行输入。下面就为大家介绍下，关于通用数据集的制作方法：

具体代码如下：

def DataSet():
    train_path_pos ='/home/lijingle/deep_work/deeptest/TFRecord/Keras_dense/data/pos/'
    train_path_neg ='/home/lijingle/deep_work/deeptest/TFRecord/Keras_dense/data/neg/'
    train_path_large ='/home/deeptest/TFRecord/Keras_dense/data/large/               /'
    test_path_pos ='/home/lijingle/deep_work/deeptest/TFRecord/Keras_dense/test/pos/'
    test_path_neg ='/home/lijingle/deep_work/deeptest/TFRecord/Keras_dense/test/neg/'
    test_path_large ='/home/              /'
    
    imglist_train_pos = os.listdir(train_path_pos)
    imglist_train_neg = os.listdir(train_path_neg)
    imglist_train_large = os.listdir(train_path_large)
    
    imglist_test_pos = os.listdir(test_path_pos)
    imglist_test_neg = os.listdir(test_path_neg)
    imglist_test_large = os.listdir(test_path_large)
        
    X_train = []
    Y_train = []
    X_test = []
    Y_test = []
   
    for img_name in imglist_train_pos:
        img_path = train_path_pos + img_name
        img = cv2.imread(img_path)
        #print('img shape : ',img.shape)
        resizeImg = cv2.resize(img, (img_width,img_high))
        #print('img11 shape : ',resizeImg.shape)
        x = img_to_array(resizeImg)
        X_train.append(x)
        Y_train.append(0)
        
    for img_name in imglist_train_neg:
        
        img_path = train_path_neg + img_name
        img = cv2.imread(img_path)
        resizeImg = cv2.resize(img, (img_width,img_high))
        x = img_to_array(resizeImg)
        X_train.append(x)
        Y_train.append(1)
       
    for img_name in imglist_train_large:
        
        img_path = train_path_neg + img_name
        img = cv2.imread(img_path)
        resizeImg = cv2.resize(img, (img_width,img_high))
        x = img_to_array(resizeImg)
        X_train.append(x)
        Y_train.append(2)       
    
    for img_name in imglist_test_pos:
        
        img_path = test_path_pos + img_name
        img = cv2.imread(img_path)
        resizeImg = cv2.resize(img, (img_width,img_high))
        x = img_to_array(resizeImg)
        X_test.append(x)
        Y_test.append(0)
  
    for img_name in imglist_test_neg:
        img_path = test_path_neg + img_name
        img = cv2.imread(img_path)
        resizeImg = cv2.resize(img, (img_width,img_high))
        x = img_to_array(resizeImg)
        X_test.append(x)
        Y_test.append(1)
        
    for img_name in imglist_test_large:
        img_path = test_path_neg + img_name
        img = cv2.imread(img_path)
        resizeImg = cv2.resize(img, (img_width,img_high))
        x = img_to_array(resizeImg)
        X_test.append(x)
        Y_test.append(2)
        
    
    
    total_input = len(X_train)    
    print("Total Train Data : %d" %total_input)
    X_train = np.array(X_train)
    X_train = X_train.astype('float32')
    X_train /= 255
    Y_train = np.array(Y_train)
    Y_train = keras.utils.to_categorical(Y_train, 3)
    total_input_test = len(X_test)
    X_test = np.array(X_test)
    X_test = X_test.astype('float32')
    X_test /= 255
    Y_test = np.array(Y_test)
    Y_test = keras.utils.to_categorical(Y_test, 3)
    return X_train,Y_train,X_test,Y_test
X_train,Y_train,X_test,Y_test = DataSet()
print('X_train shape : ',X_train.shape)
print('Y_train shape : ',Y_train.shape)
print('X_test shape : ',X_test.shape)
print('Y_test shape : ',Y_test.shape)
train = list(zip(X_train,Y_train))
random.shuffle(train)
X_train[:],Y_train[:]= zip(*train)
print(X_train[:2])
print(Y_train[:10])
test = list(zip(X_test,Y_test))
random.shuffle(test)
X_test[:],Y_test[:]= zip(*test)
print(X_test[:2])
print(Y_test[:10])
# # model

解释下：其中有两个地方是很坑的，其一为：

#问题
resizeImg = cv2.resize(img, (img_width,img_high))一定要是先宽然后是高。这个和平常的设置是相反的。(这都是泪啊，三天解决的)

还有就是获取数据后要进行打乱操作，

train = list(zip(X_train,Y_train))
random.shuffle(train)
X_train[:],Y_train[:]= zip(*train)
print(X_train[:2])
print(Y_train[:10])
test = list(zip(X_test,Y_test))
random.shuffle(test)
X_test[:],Y_test[:]= zip(*test)
print(X_test[:2])
print(Y_test[:10])

这是数据打乱的部分，其中有部分打印。这里也是很重要的。具体所有代码在下方

keras_data11.zip