Preprocess and MLmodel
unknown
python
2 years ago
3.7 kB
15
Indexable
import os
import re
import pandas as pd
from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import train_test_split
saved_model_path = 'path/to/saved/model'
tflite_model_path = 'path/where/to/save/my_model.tflite'
recorded_data_path = 'C:/Users/Jonny/Desktop/TestingSpin'
# # Converts my Tensorflow model to a Tflite version for use in java environment
# converter = tf.lite.TFLiteConverter.from_saved_model(savedModelDir)
# tfliteModel = converter.convert()
#
# # Saves the tflite model to specific dir.
# with open(tfliteModelPath, 'wb') as f:
# f.write(tfliteModel)
# Returns 1 if the motionName matches the motion I want to train for
def label_encoding(motionname):
return 1 if motionname.lower() == 'spin' else 0
# Organize data for learning
feature_data_frames = []
label_data_frames = []
data_frame = None
label = None
motion_name = None
# Iterates over all the files in the given dir
for filename in os.listdir(recorded_data_path):
# Checks if the file is a .csv
if filename.endswith(".csv"):
# Checks with regex if the filename of the .csv matches the assumed feature-data of my app.
match = re.match(r'^([\w\s]+)?_\d+\.csv$', filename)
# If the filename matches, we encode the label, extract the name of the motion from the filename
# and create dataframes for both features and labels from our data.
if match:
motion_name = match.group(1)
label = label_encoding(motion_name)
file_path = os.path.join(recorded_data_path, filename)
# Finding out the longest row for padding the dataFrame
counting_list = []
with open(file_path, 'r') as file:
for line in file:
line_data = line.strip().split(',')
counting_list.append(line_data)
max_length = max(len(row) for row in counting_list)
# Reads the csv and turning it into a dataframes and making each row the same length with the help
# of max_length.
data_frame = pd.read_csv(file_path, header=None, names=range(max_length))
data_frame.columns = ['sensorname' if i == 0 else 'timestamp' if i == 1 else 'sensorvalues'
for i in range(data_frame.shape[1])]
# Fills the blank spaces with 0.0 for rows with blank spaces
padded_data_frame = data_frame.fillna(0.0)
# Create a dataframe with the fitting label for the features
data_frame_label = pd.DataFrame({'label': [label] * len(padded_data_frame)})
# Fills the features and labels list with the features and labels dataframes
if data_frame is not None:
feature_data_frames.append(padded_data_frame)
label_data_frames.append(data_frame_label)
# Concatenating the list of dataframes into one big dataframe, preserving the pre-existing indexes
concat_feature = pd.concat(feature_data_frames)
concat_label = pd.concat(label_data_frames)
# Splitting the padded feature and label data into test and train cases
x_train, x_test, y_train, y_test = train_test_split(
concat_feature, concat_label, test_size=0.2, random_state=29, shuffle=False)
print(x_train)
print(y_train)
print(x_test)
print(y_test)
# Flattens the labels to be 1 Dimensional.
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()
# SVM setup and fitting
clf = svm.SVC(kernel='linear')
clf.fit(x_train, y_train)
y_prediction = clf.predict(x_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_prediction))
print("Precision:", metrics.precision_score(y_test, y_prediction))
print("Recall:", metrics.recall_score(y_test, y_prediction))
Editor is loading...
Leave a Comment