Preprocessing Train
unknown
python
2 years ago
2.9 kB
33
Indexable
import os
import re
import pandas as pd
from sklearn.model_selection import train_test_split
savedModelPath = 'path/to/saved/model'
tfliteModelPath = 'path/where/to/save/my_model.tflite'
recordedDataPath = 'C:/Users/Jonny/Desktop/Testdata for Python_Recordings'
# # Converts my Tensorflow model to a Tflite version for use in java environment
# converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
# tflite_model = converter.convert()
#
# # Saves the tflite model to specific dir.
# with open(tflite_model_path, 'wb') as f:
# f.write(tflite_model)
# Returns 1 if the motionName matches the motion I want to train for
def label_encoding(motionname):
return 1 if motionname.lower() == 'sittingdown' else 0
# Organize data for learning
featureDataFrames = []
labelDataFrames = []
dataFrame = None
label = None
motionName = None
# Iterates over all the files in the given dir
for filename in os.listdir(recordedDataPath):
# Checks if the file is a .csv
if filename.endswith(".csv"):
# Checks with regex if the filename of the .csv matches the assumed features data of my app.
match = re.match(r'^([\w\s]+)?_\d+\.csv$', filename)
# If the filename matches, we encode the label, extract the name of the motion from the filename
# and create dataframes for both features and labels from our data.
if match:
motionName = match.group(1)
label = label_encoding(motionName)
filePath = os.path.join(recordedDataPath, filename)
# Finding out the longest row for padding the dataFrame
countingList = []
with open(filePath, 'r') as file:
for line in file:
lineData = line.strip().split(',')
countingList.append(lineData)
maxLength = max(len(row) for row in countingList)
# Reads the csv and turning it into a dataframe
dataFrame = pd.read_csv(filePath)
dataFrame.columns = ['sensorname' if i == 0 else 'timestamp' if i == 1 else 'sensorvalues'
for i in range(dataFrame.shape[1])]
# Create a dataframe with the fitting label for the features
dataFrameLabel = pd.DataFrame({'label': label}, index=[0])
# Pads the feature dataframe and saves feature and label dataframe to lists.
if dataFrame is not None:
paddedDataFrame = dataFrame.fillna(0.0)
featureDataFrames.append(paddedDataFrame)
labelDataFrames.append(dataFrameLabel)
xTrain = []
yTrain = []
xTest = []
yTest = []
# Splitting the padded feature and label data into test and train cases
xTrain, xTest, yTrain, yTest = train_test_split(featureDataFrames, labelDataFrames, test_size=0.2, random_state=29)
print(len(xTrain))
print(len(xTest))
print(len(yTrain))
print(len(yTest))
print(yTrain)
print(xTrain)
Editor is loading...
Leave a Comment