Mango Classification | Kaggle
Hello, my name is Kishan Tongrao. Today we are going to see the Image Classification problem on Kaggle.
About Dataset:
MangoFruitDDS is a dataset of mango fruit diseases containing 1700 images of 224*224 in JPG format. The dataset contains images of four diseases namely Alternaria, Anthracnose, Black Mould Rot and Stem and Rot. An additional category in the dataset is healthy fruits. Fruit images are captured from an orchard located in Senegal, using a mobile phone camera.
Two versions are proposed: SenMangoFruitDDS_original is the one with original images and SenMangoFruitDDS_bgremoved contains images with the background removed.
# load dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
# load and create labeled data
data_dir = '/kaggle/input/mangofruitdds/MangoFruitDDS/SenMangoFruitDDS_original'
batch_size = 32
epochs = 20
input_shape = (224, 224, 3)
image_paths = []
labels = []
for category in os.listdir(data_dir):
category_dir = os.path.join(data_dir, category)
if os.path.isdir(category_dir):
for image_filename in os.listdir(category_dir):
if image_filename.endswith('.jpg'):
image_path = os.path.join(category_dir, image_filename)
image_paths.append(image_path)
labels.append(category)Py
# split dataset
train_image_paths, test_image_paths, train_labels, test_labels = train_test_split(
image_paths, labels, test_size=0.2, random_state=42)
# create data generator for training
train_datagen = ImageDataGenerator(
rescale=1.0 / 255,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest'
)
train_generator = train_datagen.flow_from_dataframe(
pd.DataFrame({'image_path': train_image_paths, 'label': train_labels}),
x_col='image_path',
y_col='label',
target_size=input_shape[:2],
batch_size=batch_size,
class_mode='categorical'
)
# build model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax')) # 5 classes (4 diseases + 1 healthy)
# compile
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=0.0001),
metrics=['accuracy'])
# check point
checkpoint = ModelCheckpoint('mango_fruit_disease_model.h5',
monitor='val_loss',
save_best_only=True,
verbose=1)
# train model
history = model.fit(
train_generator,
steps_per_epoch=train_generator.samples // batch_size,
epochs=epochs,
callbacks=[checkpoint]
)
# save model
model.save('mango_fruit_disease_model.h5')
# evaluate on test data
test_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_generator = test_datagen.flow_from_dataframe(
pd.DataFrame({'image_path': test_image_paths, 'label': test_labels}),
x_col='image_path',
y_col='label',
target_size=input_shape[:2],
batch_size=batch_size,
class_mode='categorical'
)
loss, accuracy = model.evaluate(test_generator)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy * 100:.2f}%')
Thanks!