Generative AI
An Introduction to Coding in Portfolio Development with skfolio for Building Tests, Tuning, and Comparing Modern Investment Strategies

factor_prices = load_factors_dataset()
X_full, F_full = prices_to_returns(prices, factor_prices)
X_tr, X_te, F_tr, F_te = train_test_split(
X_full, F_full, test_size=0.33, shuffle=False
)
fm = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
risk_measure=RiskMeasure.VARIANCE,
prior_estimator=FactorModel(),
)
fm.fit(X_tr, F_tr)
ptf_fm = fm.predict(X_te); ptf_fm.name = "Factor Model"
print(f"nFactor-model Sharpe: {ptf_fm.annualized_sharpe_ratio:.3f}")
pipe = Pipeline([
("preselect", SelectKExtremes(k=8, highest=True)),
("optimize", MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
risk_measure=RiskMeasure.VARIANCE)),
])
pipe.fit(X_train)
ptf_pipe = pipe.predict(X_test); ptf_pipe.name = "Top-8 + Max Sharpe"
wf_model = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
risk_measure=RiskMeasure.VARIANCE,
)
mp_portfolio = cross_val_predict(
wf_model, X,
cv=WalkForward(train_size=252*2, test_size=63),
n_jobs=-1,
)
mp_portfolio.name = "Walk-Forward Max Sharpe"
print(f"nWalk-forward portfolio Sharpe={mp_portfolio.annualized_sharpe_ratio:.3f} "
f"CalmarRatio={mp_portfolio.calmar_ratio:.3f}")
mp_portfolio.plot_cumulative_returns().show()
tuned = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
risk_measure=RiskMeasure.VARIANCE,
prior_estimator=EmpiricalPrior(mu_estimator=EWMu(alpha=0.1)),
)
grid = GridSearchCV(
estimator=tuned,
cv=WalkForward(train_size=252*2, test_size=63),
n_jobs=-1,
param_grid={
"l2_coef": [0.0, 0.01, 0.1],
"prior_estimator__mu_estimator__alpha": [0.05, 0.1, 0.2, 0.5],
},
)
grid.fit(X_train)
print("nBest params:", grid.best_params_)
print(f"Best CV score (Sharpe): {grid.best_score_:.3f}")
ptf_tuned = grid.best_estimator_.predict(X_test); ptf_tuned.name = "Tuned Max Sharpe"
final = Population([
*baseline_population,
ptf_min_var, ptf_max_sharpe,
ptf_rb_var, ptf_rb_cvar,
ptf_hrp, ptf_nco,
ptf_robust, ptf_gerber,
ptf_constr, ptf_bl, ptf_fm,
ptf_pipe, ptf_tuned,
])
_full = final.summary()
_wanted_final = [
"Annualized Mean", "Annualized Standard Deviation",
"Annualized Sharpe Ratio", "Annualized Sortino Ratio",
"CVaR at 95%", "Maximum Drawdown", "Max Drawdown",
]
_have_final = [r for r in _wanted_final if r in _full.index]
summary = _full.loc[_have_final].T.sort_values(
"Annualized Sharpe Ratio", ascending=False
)
print("n" + "=" * 80)
print("FINAL HORSE RACE — sorted by Sharpe (out-of-sample test set)")
print("=" * 80)
print(summary.to_string())
final.plot_cumulative_returns().show()
final.plot_composition().show()
ptf_rb_var.plot_contribution(measure=RiskMeasure.VARIANCE).show()
print("nDone. Try swapping risk measures, adding constraints, or wiring in")
print("your own returns DataFrame — every estimator follows the sklearn API.")



