Live Betting Strategy — Fonbet Odds vs Model Edge

Flat-stake and fractional-Kelly simulation on batch_inference predictions

Author

Dima Ivanov

Published

May 29, 2026

Show code

import sys
from pathlib import Path

project_root = Path().resolve().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import warnings
warnings.filterwarnings("ignore")

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
import numpy as np
from IPython.display import display, HTML, Markdown

LIVE_DIR = project_root / "data" / "analysis" / "live_betting"
_MINIO_PREFIX = "analysis/live_betting"

try:
    from src.app.config.storage import get_minio_settings as _get_minio_settings
    _minio_settings = _get_minio_settings()
    _minio_bucket   = getattr(_minio_settings, "bucket_predictions", None)
except Exception:
    _minio_settings = None
    _minio_bucket   = None


_missing: list[str] = []


def _try_read(fname: str) -> pd.DataFrame | None:
    """Read *fname* from MinIO.  Returns None if not yet available."""
    if not _minio_bucket:
        _missing.append(fname)
        return None
    s3_url = f"s3://{_minio_bucket}/{_MINIO_PREFIX}/{fname}"
    try:
        return pd.read_csv(s3_url, storage_options=_minio_settings.storage_options)
    except Exception:
        _missing.append(fname)
        return None


df_overall   = _try_read("overall_roi.csv")
df_threshold = _try_read("roi_by_threshold.csv")
df_segment   = _try_read("roi_by_segment.csv")
df_region    = _try_read("roi_by_region.csv")
df_ts        = _try_read("roi_timeseries.csv")

if _missing:
    display(Markdown(
        f"> **Data not yet available:** {', '.join(_missing)}.  \n"
        "> The live-betting pipeline runs automatically via Airflow after each Fonbet odds update."
    ))

0. Coverage — matched finished matches

Matches from batch_inference predictions that have both a known result and Fonbet odds available form the basis of this analysis. Coverage grows over time as more matches finish and the Fonbet odds pipeline accumulates snapshots.

Show code

if df_overall is not None:
    n_matches = int(df_overall.iloc[0]["n_matches"])
    n_bets_flat = int(df_overall[df_overall["strategy"] == "flat_stake"]["n_bets"].iloc[0])
    bet_rate = float(df_overall[df_overall["strategy"] == "flat_stake"]["bet_rate"].iloc[0])

    summary_html = f"""
    <table style="border-collapse:collapse;font-size:14px;width:480px">
      <tr style="background:#f5f5f5">
        <th style="text-align:left;padding:8px 14px">Metric</th>
        <th style="text-align:right;padding:8px 14px">Value</th>
      </tr>
      <tr><td style="padding:6px 14px">Matched finished matches</td>
          <td style="text-align:right;padding:6px 14px"><b>{n_matches:,}</b></td></tr>
      <tr><td style="padding:6px 14px">Bets placed (flat-stake, min_edge=0.02)</td>
          <td style="text-align:right;padding:6px 14px">{n_bets_flat:,}</td></tr>
      <tr><td style="padding:6px 14px">Bet rate</td>
          <td style="text-align:right;padding:6px 14px">{bet_rate:.1%}</td></tr>
    </table>
    """
    display(HTML(summary_html))

    if n_matches < 50:
        display(Markdown(
            f"> **Note:** Only {n_matches} matched matches available. "
            "Results are preliminary — re-run after more data accumulates."
        ))

Metric	Value
Matched finished matches	1,136
Bets placed (flat-stake, min_edge=0.02)	1,136
Bet rate	100.0%

1. Overall ROI — flat-stake vs fractional Kelly

Show code

if df_overall is not None:
    cols_show = [
        "strategy", "n_matches", "n_bets", "bet_rate",
        "hit_rate", "roi_pct",
    ]
    df_show = df_overall[[c for c in cols_show if c in df_overall.columns]].copy()
    if "bet_rate" in df_show.columns:
        df_show["bet_rate"] = df_show["bet_rate"].map("{:.1%}".format)
    if "hit_rate" in df_show.columns:
        df_show["hit_rate"] = df_show["hit_rate"].map(
            lambda x: f"{x:.1%}" if pd.notna(x) else "—"
        )
    if "roi_pct" in df_show.columns:
        df_show["roi_pct"] = df_show["roi_pct"].map(
            lambda x: f"{x:+.1f}%" if pd.notna(x) else "—"
        )
    display(HTML(df_show.to_html(index=False, border=0,
        classes="table table-striped", justify="left")))

strategy	n_matches	n_bets	bet_rate	hit_rate	roi_pct
flat_stake	1136	1136	100.0%	48.8%	-7.7%
kelly_0.25	1136	988	87.0%	31.5%	-16.3%

Show code

# Kelly-specific metrics: final bankroll and bankroll growth
if df_overall is not None:
    kelly_row = df_overall[df_overall["strategy"].str.startswith("kelly")]
    if not kelly_row.empty and "final_bankroll" in kelly_row.columns:
        k = kelly_row.iloc[0]
        display(Markdown(
            f"**Kelly summary:** "
            f"initial bankroll 100 → final **{k['final_bankroll']:.2f}** "
            f"({k['bankroll_growth_pct']:+.1f}% growth). "
            f"Total staked: {k['total_staked']:.2f} units."
        ))

Kelly summary: initial bankroll 100 → final 13.28 (-86.7% growth). Total staked: 533.24 units.

2. Edge threshold sweep

ROI as a function of the minimum required model edge before a bet is placed. Higher thresholds select fewer but more confident bets.

Show code

if df_threshold is not None and not df_threshold.empty:
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))

    axes[0].bar(df_threshold["min_edge"].astype(str), df_threshold["roi_pct"],
                color=["#c0392b" if v < 0 else "#27ae60" for v in df_threshold["roi_pct"]])
    axes[0].axhline(0, color="black", linewidth=0.8, linestyle="--")
    axes[0].set_xlabel("Min edge")
    axes[0].set_ylabel("ROI (%)")
    axes[0].set_title("ROI by minimum edge threshold")
    axes[0].tick_params(axis="x", rotation=45)

    axes[1].bar(df_threshold["min_edge"].astype(str), df_threshold["n_bets"],
                color="#3498db")
    axes[1].set_xlabel("Min edge")
    axes[1].set_ylabel("Number of bets")
    axes[1].set_title("Bets placed by threshold")
    axes[1].tick_params(axis="x", rotation=45)

    plt.tight_layout()
    plt.show()

    display(HTML(df_threshold.to_html(index=False, border=0,
        classes="table table-striped", float_format="{:.3f}".format)))

min_edge	n_matches	n_bets	bet_rate	n_correct_bets	hit_rate	total_staked	gross_return	net_profit	roi_pct
0.000	1136	1136	1.000	554	0.488	1136.000	1047.970	-88.030	-7.749
0.020	1136	1125	0.990	552	0.491	1125.000	1043.200	-81.800	-7.271
0.050	1136	1005	0.885	506	0.503	1005.000	930.880	-74.120	-7.375
0.100	1136	755	0.665	398	0.527	755.000	698.240	-56.760	-7.518
0.150	1136	547	0.482	297	0.543	547.000	485.410	-61.590	-11.260
0.200	1136	362	0.319	208	0.575	362.000	320.120	-41.880	-11.569
0.250	1136	250	0.220	141	0.564	250.000	204.420	-45.580	-18.232
0.300	1136	162	0.143	90	0.556	162.000	122.470	-39.530	-24.401

3. ROI by region

ROI breakdown by region (country), sorted by ROI descending. ★ marks regions where the model has below-median log-loss on historical holdout data — better-calibrated predictions give a more reliable edge signal there. Regions with fewer than min_bets bets are excluded.

⚠ Limited-data caveat: per-region ROI estimates carry wide uncertainty while the total matched-match count is small. Positive-ROI regions are candidates for selective betting — not confirmed edges. Revisit once each region accumulates ≥ 50 settled bets.

Show code

if df_region is not None and not df_region.empty:
    name_col  = "region_name" if "region_name" in df_region.columns else "regionId"
    has_ll    = "logloss" in df_region.columns
    has_lowll = "low_logloss" in df_region.columns

    cols = [name_col, "n_bets", "hit_rate", "roi_pct"]
    if has_ll:
        cols.append("logloss")
    cols = [c for c in cols if c in df_region.columns]

    df_reg_show = df_region[cols].sort_values("roi_pct", ascending=False).reset_index(drop=True).copy()

    if has_lowll:
        star_mask = df_region.sort_values("roi_pct", ascending=False)["low_logloss"].fillna(False).values
        df_reg_show[name_col] = [
            f"★ {v}" if star else v
            for v, star in zip(df_reg_show[name_col].astype(str), star_mask)
        ]

    if "hit_rate" in df_reg_show.columns:
        df_reg_show["hit_rate"] = df_reg_show["hit_rate"].map(
            lambda x: f"{x:.1%}" if pd.notna(x) else "—"
        )
    if "roi_pct" in df_reg_show.columns:
        df_reg_show["roi_pct"] = df_reg_show["roi_pct"].map(
            lambda x: f"{x:+.1f}%" if pd.notna(x) else "—"
        )
    if has_ll:
        df_reg_show["logloss"] = df_reg_show["logloss"].map(
            lambda x: f"{x:.4f}" if pd.notna(x) else "—"
        )

    display(HTML(df_reg_show.to_html(index=False, border=0,
        classes="table table-striped", justify="left")))

region_name	n_bets	hit_rate	roi_pct	logloss
Japan	10	70.0%	+45.1%	1.0468
Egypt	10	70.0%	+42.2%	1.0181
★ Scotland	12	66.7%	+34.4%	0.9980
USA	56	62.5%	+22.6%	1.0349
Chile	31	54.8%	+22.5%	1.0532
★ Bulgaria	19	52.6%	+21.6%	0.9971
★ Netherlands	29	62.1%	+18.8%	0.9922
Algeria	12	58.3%	+11.5%	1.0038
Portugal	20	60.0%	+9.4%	1.0044
Ecuador	15	53.3%	+8.3%	1.0355
Germany	37	54.1%	+7.4%	1.0204
Georgia	14	50.0%	+7.1%	1.0383
Belarus	11	54.5%	+3.2%	1.0068
★ Norway	61	57.4%	+1.8%	0.9442
England	22	45.5%	-0.2%	1.0267
Indonesia	14	57.1%	-5.1%	1.0191
Israel	23	43.5%	-7.0%	1.0128
Poland	46	45.7%	-8.2%	1.0583
Spain	39	51.3%	-10.9%	1.0098
Colombia	17	52.9%	-11.6%	1.0180
Brazil	35	42.9%	-11.9%	1.0350
South America	28	53.6%	-11.9%	1.0291
Argentina	53	41.5%	-13.9%	1.0506
Iceland	22	45.5%	-18.1%	1.0209
Ireland	13	46.2%	-18.6%	1.0291
Romania	14	35.7%	-19.6%	1.0174
★ Sweden	47	42.6%	-20.2%	0.9963
Italy	30	43.3%	-21.5%	1.0332
Hungary	10	50.0%	-23.0%	1.0374
★ Serbia	10	50.0%	-28.8%	0.9750
France	16	37.5%	-30.1%	1.0106
Iraq	12	33.3%	-32.1%	1.0008
★ Finland	13	38.5%	-34.9%	0.9953
★ South Africa	14	28.6%	-36.2%	0.9921
★ Ukraine	13	46.2%	-36.4%	0.9757
Denmark	19	36.8%	-36.9%	1.0298
South Korea	15	33.3%	-40.1%	1.0658
★ Czech Republic	15	33.3%	-41.2%	0.9853
Belgium	15	26.7%	-43.7%	1.0087
★ Bosnia-Herzegovina	12	33.3%	-47.0%	0.8823
★ Undefined	23	21.7%	-53.6%	0.9881
★ China	27	22.2%	-60.4%	0.9937

Show code

if df_region is not None and not df_region.empty:
    name_col = "region_name" if "region_name" in df_region.columns else "regionId"
    has_lowll = "low_logloss" in df_region.columns

    df_chart = df_region.sort_values("roi_pct", ascending=False).reset_index(drop=True)
    labels = df_chart[name_col].astype(str).str[:35]
    if has_lowll:
        labels = labels.where(
            ~df_chart["low_logloss"].fillna(False), "★ " + labels
        )
    roi_vals = df_chart["roi_pct"].values
    colors = ["#27ae60" if v >= 0 else "#c0392b" for v in roi_vals]

    logloss_median = (
        float(df_chart["logloss"].median()) if "logloss" in df_chart.columns else None
    )

    fig, ax = plt.subplots(figsize=(10, max(5, len(df_chart) * 0.4)))
    ax.barh(labels[::-1], roi_vals[::-1], color=colors[::-1])
    ax.axvline(0, color="black", linewidth=0.8)
    ax.set_xlabel("ROI (%)")
    title = "ROI by region (flat-stake, Fonbet odds)"
    if logloss_median is not None:
        title += f"\n★ = historical logloss < {logloss_median:.3f} (holdout median)"
    ax.set_title(title)
    plt.tight_layout()
    plt.show()

4. ROI by tournament

Drill-down from the regional view above. Only tournaments with at least 10 bets are shown. Highlights which specific leagues drive a region’s positive or negative ROI. Useful for identifying the most and least valuable leagues within a promising region.

Show code

if df_segment is not None and not df_segment.empty:
    name_col = "tournament_name" if "tournament_name" in df_segment.columns else "tournamentId"
    region_col = "region_name" if "region_name" in df_segment.columns else None

    cols = [name_col]
    if region_col:
        cols.append(region_col)
    cols += ["n_bets", "hit_rate", "roi_pct"]
    cols = [c for c in cols if c in df_segment.columns]

    df_seg_show = (
        df_segment[cols]
        .sort_values("roi_pct", ascending=False)
        .reset_index(drop=True)
    )

    # Top / bottom 15
    n_show = 15
    if len(df_seg_show) > n_show * 2:
        df_top = df_seg_show.head(n_show)
        df_bot = df_seg_show.tail(n_show)
        display(Markdown(f"**Top {n_show} tournaments by ROI:**"))
        display(HTML(df_top.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))
        display(Markdown(f"**Bottom {n_show} tournaments by ROI:**"))
        display(HTML(df_bot.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))
    else:
        display(HTML(df_seg_show.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))

    # Bar chart of ROI for top/bottom
    top_n = min(20, len(df_seg_show))
    df_chart = pd.concat([df_seg_show.head(top_n // 2), df_seg_show.tail(top_n // 2)])
    labels = df_chart[name_col].astype(str).str[:30]
    colors = ["#27ae60" if v >= 0 else "#c0392b" for v in df_chart["roi_pct"]]

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.barh(labels, df_chart["roi_pct"], color=colors)
    ax.axvline(0, color="black", linewidth=0.8)
    ax.set_xlabel("ROI (%)")
    ax.set_title("Top / bottom tournaments by ROI (flat-stake)")
    ax.invert_yaxis()
    plt.tight_layout()
    plt.show()

Top 15 tournaments by ROI:

tournament_name	region_name	n_bets	hit_rate	roi_pct
Primera B	Chile	16	0.75	81.88
USL Championship	USA	13	0.69	52.92
J- League	Japan	10	0.70	45.10
Premier League	Egypt	10	0.70	42.20
Tweede Divisie	Netherlands	15	0.67	29.33
LaLiga	Spain	18	0.61	22.06
Eliteserien	Norway	18	0.67	18.39
A PFG	Bulgaria	13	0.46	17.85
NWSL	USA	12	0.58	17.33
I Liga	Poland	15	0.47	15.67
Premier League	Belarus	10	0.60	13.50
Liga Leumit	Israel	12	0.50	13.33
Ligue Professionnelle 1	Algeria	12	0.58	11.50
Copa Libertadores	South America	13	0.69	11.08
Major League Soccer	USA	30	0.60	10.57

Bottom 15 tournaments by ROI:

tournament_name	region_name	n_bets	hit_rate	roi_pct
Copa Sudamericana	South America	15	0.40	-31.87
Super League	Iraq	12	0.33	-32.08
Colombia Cup 1	Colombia	12	0.42	-33.25
Allsvenskan	Sweden	17	0.35	-35.00
Segunda División	Spain	20	0.45	-36.20
Premier Soccer League	South Africa	14	0.29	-36.21
Druha League	Czech Republic	14	0.36	-37.00
K League 2	South Korea	15	0.33	-40.07
Premier League	Ukraine	12	0.42	-40.67
Clausura	Chile	15	0.33	-40.73
Jupiler Pro League	Belgium	15	0.27	-43.67
Premier League	Bosnia-Herzegovina	12	0.33	-47.00
NaN	Undefined	22	0.23	-51.50
Primera B Metropolitana	Argentina	18	0.22	-55.28
Super League	China	12	0.17	-71.00

5. Cumulative P&L curve

Running P&L puts the regional findings in temporal context — whether the positive edge is consistent over time or concentrated in a short window. Each point is one placed bet; the curve grows as new finished matches with Fonbet odds arrive.

Show code

if df_ts is not None and not df_ts.empty:
    if "date" in df_ts.columns:
        df_ts["date"] = pd.to_datetime(df_ts["date"], utc=True, errors="coerce")
        df_ts = df_ts.sort_values("date").reset_index(drop=True)

    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

    x = df_ts.index + 1
    axes[0].plot(x, df_ts["cumulative_profit"], color="#2c3e50", linewidth=1.5)
    axes[0].axhline(0, color="gray", linewidth=0.8, linestyle="--")
    axes[0].fill_between(x, df_ts["cumulative_profit"], 0,
        where=df_ts["cumulative_profit"] >= 0, alpha=0.25, color="#27ae60")
    axes[0].fill_between(x, df_ts["cumulative_profit"], 0,
        where=df_ts["cumulative_profit"] < 0, alpha=0.25, color="#c0392b")
    axes[0].set_ylabel("Cumulative profit (units)")
    axes[0].set_title("Cumulative P&L — flat stake (1 unit per bet)")

    axes[1].plot(x, df_ts["cumulative_roi_pct"], color="#8e44ad", linewidth=1.5)
    axes[1].axhline(0, color="gray", linewidth=0.8, linestyle="--")
    axes[1].set_xlabel("Bet number")
    axes[1].set_ylabel("Cumulative ROI (%)")
    axes[1].set_title("Running ROI %")
    axes[1].yaxis.set_major_formatter(mticker.FormatStrFormatter("%.1f%%"))

    plt.tight_layout()
    plt.show()

    # Summary stats
    final_roi = float(df_ts["cumulative_roi_pct"].iloc[-1])
    max_drawdown = float(
        (df_ts["cumulative_profit"] - df_ts["cumulative_profit"].cummax()).min()
    )
    display(Markdown(
        f"**Final ROI:** {final_roi:+.2f}% over {len(df_ts)} bets. "
        f"**Max drawdown:** {max_drawdown:.2f} units."
    ))

Final ROI: -9.20% over 988 bets. Max drawdown: -117.07 units.

Summary

Show code

if df_overall is not None:
    flat = df_overall[df_overall["strategy"] == "flat_stake"].iloc[0]
    kelly_rows = df_overall[df_overall["strategy"].str.startswith("kelly")]

    lines = [
        f"- **Matched finished matches:** {int(flat['n_matches']):,}",
        f"- **Bets placed (flat):** {int(flat['n_bets']):,} ({float(flat['bet_rate']):.1%} of matches)",
        f"- **Flat-stake ROI:** {float(flat['roi_pct']):+.1f}%",
        f"- **Hit rate:** {float(flat['hit_rate']):.1%}" if pd.notna(flat.get("hit_rate")) else "",
    ]
    if not kelly_rows.empty:
        k = kelly_rows.iloc[0]
        lines.append(f"- **Kelly (f=0.25) bankroll growth:** {float(k['bankroll_growth_pct']):+.1f}%")

    # Regional edge summary
    if df_region is not None and not df_region.empty:
        name_col = "region_name" if "region_name" in df_region.columns else "regionId"
        pos_regions = df_region[df_region["roi_pct"] > 0].sort_values("roi_pct", ascending=False)
        if not pos_regions.empty:
            top_names = ", ".join(pos_regions[name_col].astype(str).head(5).tolist())
            caveat = (
                " — limited data, monitor as more matches settle."
                if int(flat["n_matches"]) < 200
                else "."
            )
            lines.append(
                f"- **Regions with positive ROI ({len(pos_regions)}):** {top_names}{caveat}"
            )

    lines.append("")
    lines.append(
        "> This report accumulates results over time. Re-run `make live-betting` "
        "and re-render when new match data arrives."
    )
    display(Markdown("\n".join(l for l in lines if l is not None)))

Matched finished matches: 1,136
Bets placed (flat): 1,136 (100.0% of matches)
Flat-stake ROI: -7.7%
Hit rate: 48.8%
Kelly (f=0.25) bankroll growth: -86.7%
Regions with positive ROI (14): Japan, Egypt, Scotland, USA, Chile.

This report accumulates results over time. Re-run make live-betting and re-render when new match data arrives.