0% found this document useful (0 votes)
18 views

Tree

The document discusses extracting global features from plant images for disease classification. It performs preprocessing steps like segmentation, color space conversion and extracting features like histogram, Haralick texture and Hu moments. The features are normalized, stored in HDF5 format and various machine learning models are trained and evaluated using 10-fold cross-validation.

Uploaded by

indigalakishore
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

Tree

The document discusses extracting global features from plant images for disease classification. It performs preprocessing steps like segmentation, color space conversion and extracting features like histogram, Haralick texture and Hu moments. The features are normalized, stored in HDF5 format and various machine learning models are trained and evaluated using 10-fold cross-validation.

Uploaded by

indigalakishore
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

pip install mahotas

Requirement already satisfied: mahotas in /usr/local/lib/python3.10/dist-packages (1.4.14)


Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mahotas) (1.25.2)

#-----------------------------------
# GLOBAL FEATURE EXTRACTION
#-----------------------------------
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py

#--------------------
# tunable-parameters
#--------------------
images_per_class = 800
fixed_size = tuple((500, 500))
train_path = r"/content/drive/MyDrive/sharunplant/archive/dataset/train"
h5_train_data = r"/content/drive/MyDrive/sharunplant/train_data.h5/train_data.h5"
h5_train_labels = r"/content/drive/MyDrive/sharunplant/train_labels.h5"
bins = 8

# Converting each image to RGB from BGR format

def rgb_bgr(image):
rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return rgb_img

# Conversion to HSV image format from RGB

def bgr_hsv(rgb_img):
hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
return hsv_img

# image segmentation

# for extraction of green and brown color

def img_segmentation(rgb_img,hsv_img):
lower_green = np.array([25,0,20])
upper_green = np.array([100,255,255])
healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
lower_brown = np.array([10,0,10])
upper_brown = np.array([30,255,255])
disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
final_mask = healthy_mask + disease_mask
final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
return final_result

# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
feature = cv2.HuMoments(cv2.moments(image)).flatten()
return feature

# feature-descriptor-2: Haralick Texture


def fd_haralick(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
haralick = mahotas.features.haralick(gray).mean(axis=0)
return haralick
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
hist = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
cv2.normalize(hist, hist)
return hist.flatten()

# get the training labels


train_labels = os.listdir(train_path)

# sort the training labels


train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels


global_features = []
labels = []

['diseased', 'healthy']

# loop over the training data sub-folders


for training_name in train_labels:
# join the training data path and each species training folder
dir = os.path.join(train_path, training_name)

# get the current training label


current_label = training_name

# loop over the images in each sub-folder


for x in range(1,images_per_class+1):
# get the image file name
file = dir + "/" + str(x) + ".jpg"

# read the image and resize it to a fixed-size


image = cv2.imread(file)
image = cv2.resize(image, fixed_size)

# Running Function Bit By Bit

RGB_BGR = rgb_bgr(image)
BGR_HSV = bgr_hsv(RGB_BGR)
IMG_SEGMENT = img_segmentation(RGB_BGR,BGR_HSV)

# Call for Global Fetaure Descriptors

fv_hu_moments = fd_hu_moments(IMG_SEGMENT)
fv_haralick = fd_haralick(IMG_SEGMENT)
fv_histogram = fd_histogram(IMG_SEGMENT)

# Concatenate

global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])

# update the list of labels and feature vectors


labels.append(current_label)
global_features.append(global_feature)

print("[STATUS] processed folder: {}".format(current_label))

print("[STATUS] completed Global Feature Extraction...")

[STATUS] processed folder: diseased


[STATUS] processed folder: healthy
[STATUS] completed Global Feature Extraction...

# get the overall feature vector size


print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

[STATUS] feature vector size (1600, 532)

# get the overall training label size


print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (1600,)


# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...

# scale features in the range (0-1)


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...

print("[STATUS] target labels: {}".format(target))


print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]


[STATUS] target labels shape: (1600,)

# Close any previously opened file handles


if 'h5f_data' in locals() and isinstance(h5f_data, h5py.File):
h5f_data.close()

# Create the HDF5 file


h5f_data = h5py.File(h5_train_data, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

<HDF5 dataset "dataset_1": shape (1600, 532), type "<f8">

h5f_label = h5py.File(h5_train_labels, 'w')


h5f_label.create_dataset('dataset_1', data=np.array(target))

<HDF5 dataset "dataset_1": shape (1600,), type "<i8">

h5f_data.close()
h5f_label.close()

# training
#-----------------------------------
# TRAINING OUR MODEL
#-----------------------------------
import h5py
import numpy as np
import os
import glob
import cv2
import warnings
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
#from sklearn.externals import joblib
from joblib import dump, load

warnings.filterwarnings('ignore')

#--------------------
# tunable-parameters
#--------------------
num_trees = 100
test_size = 0.20
seed = 9
train_path = r"/content/drive/MyDrive/sharunplant/archive/dataset/train"
test_path = "dataset/test"
h5_train_data =r"/content/drive/MyDrive/sharunplant/train_data.h5/train_data.h5"
h5_train_labels = r"/content/drive/MyDrive/sharunplant/train_labels.h5"
scoring = "accuracy"

# get the training labels


train_labels = os.listdir(train_path)

# sort the training labels


train_labels.sort()

if not os.path.exists(test_path):
os.makedirs(test_path)

# create all the machine learning models


models = []
models.append(('LR', LogisticRegression(random_state=seed)))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier(random_state=seed)))
models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(random_state=seed)))

# variables to hold the results and names


results = []
names = []

# import the feature vector and trained labels


h5f_data = h5py.File(h5_train_data, 'r')
h5f_label = h5py.File(h5_train_labels, 'r')

global_features_string = h5f_data['dataset_1']
global_labels_string = h5f_label['dataset_1']

global_features = np.array(global_features_string)
global_labels = np.array(global_labels_string)

h5f_data.close()
h5f_label.close()

# verify the shape of the feature vector and labels


print("[STATUS] features shape: {}".format(global_features.shape))
print("[STATUS] labels shape: {}".format(global_labels.shape))

print("[STATUS] training started...")

output [STATUS] features shape: (1600, 532)


[STATUS] labels shape: (1600,)
[STATUS] training started...

# split the training and testing data


(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
np.array(global_labels),
test_size=test_size,
random_state=seed)

print("[STATUS] splitted train and test data...")


print("Train data : {}".format(trainDataGlobal.shape))
print("Test data : {}".format(testDataGlobal.shape))

[STATUS] splitted train and test data...


Train data : (1280, 532)
Test data : (320, 532)

print(trainDataGlobal)

[[0.97973956 0. 0. ... 0.01058888 0.12693628 0.96573219]


[0.90470961 0.03308547 0.03047729 ... 0.01058886 0.12692807 0.96573214]
[0.92166131 0.01766604 0.03559807 ... 0.01058994 0.12737134 0.96573262]
...
[0.77039234 0. 0. ... 0.01058886 0.12693296 0.96573218]
[0.96965597 0. 0. ... 0.01058989 0.1271198 0.96574232]
[0.98389776 0.19183335 0.15462199 ... 0.01058898 0.12695993 0.96573136]]
# 10-fold cross-validation loop
for name, model in models:
kfold = KFold(n_splits=10, random_state=seed,shuffle=True) # Set shuffle to True
cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)

# boxplot algorithm comparison


fig = pyplot.figure()
fig.suptitle('Machine Learning Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

LR: 0.916406 (0.021833)


LDA: 0.907813 (0.019702)
KNN: 0.920312 (0.012500)
CART: 0.906250 (0.027951)
RF: 0.957031 (0.013189)
NB: 0.857031 (0.010511)
SVM: 0.916406 (0.020086)

clf = RandomForestClassifier(n_estimators=num_trees, random_state=seed)

clf.fit(trainDataGlobal, trainLabelsGlobal)

▾ RandomForestClassifier
RandomForestClassifier(random_state=9)

y_predict=clf.predict(testDataGlobal)

print(y_predict)

[1 1 1 0 0 1 1 1 0 0 1 1 0 0 0 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1
1 1 0 1 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 1 1 0 0 0 1 0
0 1 0 1 1 0 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0
1 0 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 0 1 0 0 0 1 1 0 0 0 1 1 0 0
0 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0
1 1 1 1 0 1 1 1 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 1 1 0 0 1 1 1 1 0 1 1 1 0 0
1 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 1 1
1 1 1 1 1 1 0 1 0 1 0 1 0 0 0 0 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1
1 1 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 1 0 0]

cm = confusion_matrix(testLabelsGlobal,y_predict)
import seaborn as sns
sns.heatmap(cm ,annot=True)

<Axes: >

print(classification_report(testLabelsGlobal,y_predict))

precision recall f1-score support

0 0.99 0.97 0.98 158


1 0.98 0.99 0.98 162

accuracy 0.98 320


macro avg 0.98 0.98 0.98 320
weighted avg 0.98 0.98 0.98 320

from sklearn.metrics import accuracy_score


accuracy_score(testLabelsGlobal, y_predict)

0.98125

import joblib

# Assuming 'clf' is your trained model


joblib.dump(clf, 'trained_model.pkl')

['trained_model.pkl']

import cv2
import numpy as np
import joblib

# Load the trained model


clf = joblib.load('trained_model.pkl')

# Define preprocessing functions


def preprocess_image(image):
# Resize the image to the same size used during training
image = cv2.resize(image, (500, 500))
# Convert image to RGB (if it's not already in RGB format)
#image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Perform any other preprocessing steps here if necessary
return image

def extract_features(image):
# Extract features using the same feature extraction functions used during training
# You can reuse fd_hu_moments, fd_haralick, and fd_histogram functions from your code
rgb_bgr_image = rgb_bgr(image)
bgr_hsv_image = bgr_hsv(rgb_bgr_image)
segmented_image = img_segmentation(rgb_bgr_image, bgr_hsv_image)
feature_vector = np.hstack([fd_histogram(segmented_image), fd_haralick(segmented_image), fd_hu_moments(seg
return feature_vector

# Load a sample image


sample_image_path = "/content/drive/MyDrive/check/dis1.png"
sample_image = cv2.imread(sample_image_path)

# Preprocess the sample image


preprocessed_sample_image = preprocess_image(sample_image)

# Extract features from the preprocessed sample image


sample_image_features = extract_features(preprocessed_sample_image)

# Reshape features to match the input shape expected by the model


sample_image_features = sample_image_features.reshape(1, -1)

# Use the loaded model to make predictions


prediction = clf.predict(sample_image_features)

# Print the prediction


if prediction == 0:
print("The sample image is predicted to be diseased.")
else:
print("The sample image is predicted to be healthy.")

The sample image is predicted to be diseased.

You might also like