Untitled

 avatar
unknown
plain_text
17 days ago
2.5 kB
2
Indexable
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from scipy.stats import zscore

def load_data(file_path):
    """Load historical price data from an Excel file."""
    df = pd.read_excel(file_path, index_col=0, parse_dates=True)
    return df

def calculate_returns(price_df):
    """Calculate log returns from price data."""
    return np.log(price_df / price_df.shift(1)).dropna()

def calculate_scores(signal_df, n_components=3):
    """Calculate asset scores using PCA and correlation-adjusted weighting."""
    df_z = signal_df.apply(zscore)
    
    # PCA-Based Scoring
    pca = PCA(n_components=n_components)
    pca_scores = pca.fit_transform(df_z)
    explained_variance = pca.explained_variance_ratio_
    pca_weighted_scores = np.dot(pca_scores, explained_variance[:n_components])
    
    # Correlation-Adjusted Scoring
    correlation_matrix = df_z.corr()
    weights = np.ones(signal_df.shape[1]) / signal_df.shape[1]
    adjusted_weights = weights.copy()
    for j in range(signal_df.shape[1]):
        correlation_penalty = sum(correlation_matrix.iloc[j, k] * weights[k] 
                                  for k in range(signal_df.shape[1]) if k != j)
        adjusted_weights[j] *= (1 - correlation_penalty)
    adjusted_weights /= np.sum(adjusted_weights)
    correlation_adjusted_scores = df_z.dot(adjusted_weights)
    
    return pca_weighted_scores, correlation_adjusted_scores

def backtest_strategy(price_df, signal_df, start_date, rebalance_freq):
    """Backtest the strategy from start_date onwards with a given rebalance frequency."""
    returns_df = calculate_returns(price_df)
    portfolio_value = 1  # Initial AUM
    portfolio_returns = []
    
    for date in returns_df.loc[start_date:].index[::rebalance_freq]:
        signals = signal_df.loc[date] if date in signal_df.index else None
        if signals is None:
            continue
        
        pca_scores, correlation_adjusted_scores = calculate_scores(signals)
        ranked_assets = correlation_adjusted_scores.rank(ascending=False)
        top_assets = ranked_assets.nsmallest(5).index
        
        # Equal weight allocation to top 5 assets
        weights = pd.Series(0, index=returns_df.columns)
        weights.loc[top_assets] = 1 / 5
        
        period_returns = (returns_df.loc[date] * weights).sum()
        portfolio_value *= (1 + period_returns)
        portfolio_returns.append(portfolio_value)
    
    return portfolio_returns

Editor is loading...
Leave a Comment