# Initialize variables to store the start and end times
start_time = None
end_time = None
#DfB has only values for cement type cement_type_cpiv_bool_pv and the mill in normal operation conditions
valid_indices = dfB[(~dfB['separator_pwr_kw_pv'].isna()) &
# (~dfB['mill_dp_mbar_pv'].isna()) &
(~dfB['separator_speed_rpm_pv'].isna())].index
valid_periods = []
# Iterate through each row of the dataset do define periods where we have continouys valid subsets of the dataset
for index, row in dfB.iterrows():
if start_time is None:
start_time = index
flag=True
if (index in valid_indices) and flag==True:
duration = index - start_time
if duration.total_seconds() == 60 * 60:
end_time = index
valid_periods.append((start_time, end_time))
else:
flag=False
start_time = None
def check_cross_corr(column,df_temp,start_time,end_time,m_type,max_lag=30):
df_temp = df_temp[start_time:end_time]
speed_normalized = (df_temp[column] - np.mean(df_temp[column])) / np.std(df_temp[column])
power_normalized = (df_temp['separator_pwr_kw_pv'] - np.mean(df_temp['separator_pwr_kw_pv'])) / np.std(df_temp['separator_pwr_kw_pv'])
cross_corr = np.correlate(speed_normalized, power_normalized, mode='full')
lags = np.arange(-max_lag, max_lag + 1)
if m_type == 'MAX':
max_corr_index = np.argmax(cross_corr[:len(lags)]) # Index of the maximum correlation
max_corr_lag = lags[max_corr_index] # Lag corresponding to the maximum correlation
return start_time ,end_time ,np.max(cross_corr[:len(lags)]), max_corr_lag
else:
min_corr_index = np.argmin(cross_corr[:len(lags)]) # Index of the minimum correlation
min_corr_lag = lags[min_corr_index] # Lag corresponding to the maximum correlation
return start_time ,end_time ,np.min(cross_corr[:len(lags)]), min_corr_lag
lags = []
for period in valid_periods:
result_tuple = check_cross_corr('separator_speed_rpm_pv',dfB,period[0],period[1],m_type = 'MAX', max_lag=60)
lags.append(result_tuple[3])
plt.hist(lags, bins=50)
plt.show()
most_common_value = max(set(lags), key=lags.count)
print(most_common_value)