Preprocessing Train
unknown
python
a year ago
2.9 kB
24
Indexable
import os import re import pandas as pd from sklearn.model_selection import train_test_split savedModelPath = 'path/to/saved/model' tfliteModelPath = 'path/where/to/save/my_model.tflite' recordedDataPath = 'C:/Users/Jonny/Desktop/Testdata for Python_Recordings' # # Converts my Tensorflow model to a Tflite version for use in java environment # converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) # tflite_model = converter.convert() # # # Saves the tflite model to specific dir. # with open(tflite_model_path, 'wb') as f: # f.write(tflite_model) # Returns 1 if the motionName matches the motion I want to train for def label_encoding(motionname): return 1 if motionname.lower() == 'sittingdown' else 0 # Organize data for learning featureDataFrames = [] labelDataFrames = [] dataFrame = None label = None motionName = None # Iterates over all the files in the given dir for filename in os.listdir(recordedDataPath): # Checks if the file is a .csv if filename.endswith(".csv"): # Checks with regex if the filename of the .csv matches the assumed features data of my app. match = re.match(r'^([\w\s]+)?_\d+\.csv$', filename) # If the filename matches, we encode the label, extract the name of the motion from the filename # and create dataframes for both features and labels from our data. if match: motionName = match.group(1) label = label_encoding(motionName) filePath = os.path.join(recordedDataPath, filename) # Finding out the longest row for padding the dataFrame countingList = [] with open(filePath, 'r') as file: for line in file: lineData = line.strip().split(',') countingList.append(lineData) maxLength = max(len(row) for row in countingList) # Reads the csv and turning it into a dataframe dataFrame = pd.read_csv(filePath) dataFrame.columns = ['sensorname' if i == 0 else 'timestamp' if i == 1 else 'sensorvalues' for i in range(dataFrame.shape[1])] # Create a dataframe with the fitting label for the features dataFrameLabel = pd.DataFrame({'label': label}, index=[0]) # Pads the feature dataframe and saves feature and label dataframe to lists. if dataFrame is not None: paddedDataFrame = dataFrame.fillna(0.0) featureDataFrames.append(paddedDataFrame) labelDataFrames.append(dataFrameLabel) xTrain = [] yTrain = [] xTest = [] yTest = [] # Splitting the padded feature and label data into test and train cases xTrain, xTest, yTrain, yTest = train_test_split(featureDataFrames, labelDataFrames, test_size=0.2, random_state=29) print(len(xTrain)) print(len(xTest)) print(len(yTrain)) print(len(yTest)) print(yTrain) print(xTrain)
Editor is loading...
Leave a Comment