Preprocessing Train

mail@pastecode.io avatar
unknown
python
2 months ago
2.9 kB
11
Indexable
Never
import os
import re
import pandas as pd
from sklearn.model_selection import train_test_split

savedModelPath = 'path/to/saved/model'
tfliteModelPath = 'path/where/to/save/my_model.tflite'
recordedDataPath = 'C:/Users/Jonny/Desktop/Testdata for Python_Recordings'


# # Converts my Tensorflow model to a Tflite version for use in java environment
# converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
# tflite_model = converter.convert()
#
# # Saves the tflite model to specific dir.
# with open(tflite_model_path, 'wb') as f:
#     f.write(tflite_model)


# Returns 1 if the motionName matches the motion I want to train for
def label_encoding(motionname):
    return 1 if motionname.lower() == 'sittingdown' else 0


# Organize data for learning
featureDataFrames = []
labelDataFrames = []
dataFrame = None
label = None
motionName = None

# Iterates over all the files in the given dir
for filename in os.listdir(recordedDataPath):
    # Checks if the file is a .csv
    if filename.endswith(".csv"):
        # Checks with regex if the filename of the .csv matches the assumed features data of my app.
        match = re.match(r'^([\w\s]+)?_\d+\.csv$', filename)
        # If the filename matches, we encode the label, extract the name of the motion from the filename
        # and create dataframes for both features and labels from our data.
        if match:
            motionName = match.group(1)
            label = label_encoding(motionName)

            filePath = os.path.join(recordedDataPath, filename)

            # Finding out the longest row for padding the dataFrame
            countingList = []
            with open(filePath, 'r') as file:
                for line in file:
                    lineData = line.strip().split(',')
                    countingList.append(lineData)
            maxLength = max(len(row) for row in countingList)

            # Reads the csv and turning it into a dataframe
            dataFrame = pd.read_csv(filePath)
            dataFrame.columns = ['sensorname' if i == 0 else 'timestamp' if i == 1 else 'sensorvalues'
                                 for i in range(dataFrame.shape[1])]

            # Create a dataframe with the fitting label for the features
            dataFrameLabel = pd.DataFrame({'label': label}, index=[0])

            # Pads the feature dataframe and saves feature and label dataframe to lists.
            if dataFrame is not None:
                paddedDataFrame = dataFrame.fillna(0.0)
                featureDataFrames.append(paddedDataFrame)
                labelDataFrames.append(dataFrameLabel)

xTrain = []
yTrain = []
xTest = []
yTest = []

# Splitting the padded feature and label data into test and train cases
xTrain, xTest, yTrain, yTest = train_test_split(featureDataFrames, labelDataFrames, test_size=0.2, random_state=29)

print(len(xTrain))
print(len(xTest))
print(len(yTrain))
print(len(yTest))
print(yTrain)
print(xTrain)
Leave a Comment