Preprocess and MLmodel
unknown
python
a year ago
3.7 kB
5
Indexable
import os import re import pandas as pd from sklearn import metrics from sklearn import svm from sklearn.model_selection import train_test_split saved_model_path = 'path/to/saved/model' tflite_model_path = 'path/where/to/save/my_model.tflite' recorded_data_path = 'C:/Users/Jonny/Desktop/TestingSpin' # # Converts my Tensorflow model to a Tflite version for use in java environment # converter = tf.lite.TFLiteConverter.from_saved_model(savedModelDir) # tfliteModel = converter.convert() # # # Saves the tflite model to specific dir. # with open(tfliteModelPath, 'wb') as f: # f.write(tfliteModel) # Returns 1 if the motionName matches the motion I want to train for def label_encoding(motionname): return 1 if motionname.lower() == 'spin' else 0 # Organize data for learning feature_data_frames = [] label_data_frames = [] data_frame = None label = None motion_name = None # Iterates over all the files in the given dir for filename in os.listdir(recorded_data_path): # Checks if the file is a .csv if filename.endswith(".csv"): # Checks with regex if the filename of the .csv matches the assumed feature-data of my app. match = re.match(r'^([\w\s]+)?_\d+\.csv$', filename) # If the filename matches, we encode the label, extract the name of the motion from the filename # and create dataframes for both features and labels from our data. if match: motion_name = match.group(1) label = label_encoding(motion_name) file_path = os.path.join(recorded_data_path, filename) # Finding out the longest row for padding the dataFrame counting_list = [] with open(file_path, 'r') as file: for line in file: line_data = line.strip().split(',') counting_list.append(line_data) max_length = max(len(row) for row in counting_list) # Reads the csv and turning it into a dataframes and making each row the same length with the help # of max_length. data_frame = pd.read_csv(file_path, header=None, names=range(max_length)) data_frame.columns = ['sensorname' if i == 0 else 'timestamp' if i == 1 else 'sensorvalues' for i in range(data_frame.shape[1])] # Fills the blank spaces with 0.0 for rows with blank spaces padded_data_frame = data_frame.fillna(0.0) # Create a dataframe with the fitting label for the features data_frame_label = pd.DataFrame({'label': [label] * len(padded_data_frame)}) # Fills the features and labels list with the features and labels dataframes if data_frame is not None: feature_data_frames.append(padded_data_frame) label_data_frames.append(data_frame_label) # Concatenating the list of dataframes into one big dataframe, preserving the pre-existing indexes concat_feature = pd.concat(feature_data_frames) concat_label = pd.concat(label_data_frames) # Splitting the padded feature and label data into test and train cases x_train, x_test, y_train, y_test = train_test_split( concat_feature, concat_label, test_size=0.2, random_state=29, shuffle=False) print(x_train) print(y_train) print(x_test) print(y_test) # Flattens the labels to be 1 Dimensional. y_train = y_train.values.ravel() y_test = y_test.values.ravel() # SVM setup and fitting clf = svm.SVC(kernel='linear') clf.fit(x_train, y_train) y_prediction = clf.predict(x_test) print("Accuracy:", metrics.accuracy_score(y_test, y_prediction)) print("Precision:", metrics.precision_score(y_test, y_prediction)) print("Recall:", metrics.recall_score(y_test, y_prediction))
Editor is loading...
Leave a Comment