SoccerPredictAI
  • Home
  • Reports
    • 01 · EDA & Preprocessing
    • 02 · Feature Engineering
    • 03 · Experiment Studies v1.01–v1.05
    • 04 · Model Analysis
    • 05 · Holdout Analysis
    • 06 · Live Inference & Odds
    • 07 · Live Betting Strategy
  • Back to Docs (MkDocs)

On this page

  • 0. Coverage — matched finished matches
  • 1. Overall ROI — flat-stake vs fractional Kelly
  • 2. Edge threshold sweep
  • 3. ROI by region
  • 4. ROI by tournament
  • 5. Cumulative P&L curve
  • Summary

Live Betting Strategy — Fonbet Odds vs Model Edge

Flat-stake and fractional-Kelly simulation on batch_inference predictions

Author

Dima Ivanov

Published

May 28, 2026

Show code
import sys
from pathlib import Path

project_root = Path().resolve().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import warnings
warnings.filterwarnings("ignore")

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
import numpy as np
from IPython.display import display, HTML, Markdown

LIVE_DIR = project_root / "data" / "analysis" / "live_betting"
_MINIO_PREFIX = "analysis/live_betting"

try:
    from src.app.config.storage import get_minio_settings as _get_minio_settings
    _minio_settings = _get_minio_settings()
    _minio_bucket   = getattr(_minio_settings, "bucket_predictions", None)
except Exception:
    _minio_settings = None
    _minio_bucket   = None


_missing: list[str] = []


def _try_read(fname: str) -> pd.DataFrame | None:
    """Read *fname* from MinIO.  Returns None if not yet available."""
    if not _minio_bucket:
        _missing.append(fname)
        return None
    s3_url = f"s3://{_minio_bucket}/{_MINIO_PREFIX}/{fname}"
    try:
        return pd.read_csv(s3_url, storage_options=_minio_settings.storage_options)
    except Exception:
        _missing.append(fname)
        return None


df_overall   = _try_read("overall_roi.csv")
df_threshold = _try_read("roi_by_threshold.csv")
df_segment   = _try_read("roi_by_segment.csv")
df_region    = _try_read("roi_by_region.csv")
df_ts        = _try_read("roi_timeseries.csv")

if _missing:
    display(Markdown(
        f"> **Data not yet available:** {', '.join(_missing)}.  \n"
        "> The live-betting pipeline runs automatically via Airflow after each Fonbet odds update."
    ))

0. Coverage — matched finished matches

Matches from batch_inference predictions that have both a known result and Fonbet odds available form the basis of this analysis. Coverage grows over time as more matches finish and the Fonbet odds pipeline accumulates snapshots.

Show code
if df_overall is not None:
    n_matches = int(df_overall.iloc[0]["n_matches"])
    n_bets_flat = int(df_overall[df_overall["strategy"] == "flat_stake"]["n_bets"].iloc[0])
    bet_rate = float(df_overall[df_overall["strategy"] == "flat_stake"]["bet_rate"].iloc[0])

    summary_html = f"""
    <table style="border-collapse:collapse;font-size:14px;width:480px">
      <tr style="background:#f5f5f5">
        <th style="text-align:left;padding:8px 14px">Metric</th>
        <th style="text-align:right;padding:8px 14px">Value</th>
      </tr>
      <tr><td style="padding:6px 14px">Matched finished matches</td>
          <td style="text-align:right;padding:6px 14px"><b>{n_matches:,}</b></td></tr>
      <tr><td style="padding:6px 14px">Bets placed (flat-stake, min_edge=0.02)</td>
          <td style="text-align:right;padding:6px 14px">{n_bets_flat:,}</td></tr>
      <tr><td style="padding:6px 14px">Bet rate</td>
          <td style="text-align:right;padding:6px 14px">{bet_rate:.1%}</td></tr>
    </table>
    """
    display(HTML(summary_html))

    if n_matches < 50:
        display(Markdown(
            f"> **Note:** Only {n_matches} matched matches available. "
            "Results are preliminary — re-run after more data accumulates."
        ))
Metric Value
Matched finished matches 1,136
Bets placed (flat-stake, min_edge=0.02) 1,136
Bet rate 100.0%

1. Overall ROI — flat-stake vs fractional Kelly

Show code
if df_overall is not None:
    cols_show = [
        "strategy", "n_matches", "n_bets", "bet_rate",
        "hit_rate", "roi_pct",
    ]
    df_show = df_overall[[c for c in cols_show if c in df_overall.columns]].copy()
    if "bet_rate" in df_show.columns:
        df_show["bet_rate"] = df_show["bet_rate"].map("{:.1%}".format)
    if "hit_rate" in df_show.columns:
        df_show["hit_rate"] = df_show["hit_rate"].map(
            lambda x: f"{x:.1%}" if pd.notna(x) else "—"
        )
    if "roi_pct" in df_show.columns:
        df_show["roi_pct"] = df_show["roi_pct"].map(
            lambda x: f"{x:+.1f}%" if pd.notna(x) else "—"
        )
    display(HTML(df_show.to_html(index=False, border=0,
        classes="table table-striped", justify="left")))
strategy n_matches n_bets bet_rate hit_rate roi_pct
flat_stake 1136 1136 100.0% 48.8% -7.7%
kelly_0.25 1136 988 87.0% 31.5% -16.3%
Show code
# Kelly-specific metrics: final bankroll and bankroll growth
if df_overall is not None:
    kelly_row = df_overall[df_overall["strategy"].str.startswith("kelly")]
    if not kelly_row.empty and "final_bankroll" in kelly_row.columns:
        k = kelly_row.iloc[0]
        display(Markdown(
            f"**Kelly summary:** "
            f"initial bankroll 100 → final **{k['final_bankroll']:.2f}** "
            f"({k['bankroll_growth_pct']:+.1f}% growth). "
            f"Total staked: {k['total_staked']:.2f} units."
        ))

Kelly summary: initial bankroll 100 → final 13.28 (-86.7% growth). Total staked: 533.24 units.


2. Edge threshold sweep

ROI as a function of the minimum required model edge before a bet is placed. Higher thresholds select fewer but more confident bets.

Show code
if df_threshold is not None and not df_threshold.empty:
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))

    axes[0].bar(df_threshold["min_edge"].astype(str), df_threshold["roi_pct"],
                color=["#c0392b" if v < 0 else "#27ae60" for v in df_threshold["roi_pct"]])
    axes[0].axhline(0, color="black", linewidth=0.8, linestyle="--")
    axes[0].set_xlabel("Min edge")
    axes[0].set_ylabel("ROI (%)")
    axes[0].set_title("ROI by minimum edge threshold")
    axes[0].tick_params(axis="x", rotation=45)

    axes[1].bar(df_threshold["min_edge"].astype(str), df_threshold["n_bets"],
                color="#3498db")
    axes[1].set_xlabel("Min edge")
    axes[1].set_ylabel("Number of bets")
    axes[1].set_title("Bets placed by threshold")
    axes[1].tick_params(axis="x", rotation=45)

    plt.tight_layout()
    plt.show()

    display(HTML(df_threshold.to_html(index=False, border=0,
        classes="table table-striped", float_format="{:.3f}".format)))

min_edge n_matches n_bets bet_rate n_correct_bets hit_rate total_staked gross_return net_profit roi_pct
0.000 1136 1136 1.000 554 0.488 1136.000 1047.970 -88.030 -7.749
0.020 1136 1125 0.990 552 0.491 1125.000 1043.200 -81.800 -7.271
0.050 1136 1005 0.885 506 0.503 1005.000 930.880 -74.120 -7.375
0.100 1136 755 0.665 398 0.527 755.000 698.240 -56.760 -7.518
0.150 1136 547 0.482 297 0.543 547.000 485.410 -61.590 -11.260
0.200 1136 362 0.319 208 0.575 362.000 320.120 -41.880 -11.569
0.250 1136 250 0.220 141 0.564 250.000 204.420 -45.580 -18.232
0.300 1136 162 0.143 90 0.556 162.000 122.470 -39.530 -24.401

3. ROI by region

ROI breakdown by region (country), sorted by ROI descending. ★ marks regions where the model has below-median log-loss on historical holdout data — better-calibrated predictions give a more reliable edge signal there. Regions with fewer than min_bets bets are excluded.

⚠ Limited-data caveat: per-region ROI estimates carry wide uncertainty while the total matched-match count is small. Positive-ROI regions are candidates for selective betting — not confirmed edges. Revisit once each region accumulates ≥ 50 settled bets.

Show code
if df_region is not None and not df_region.empty:
    name_col  = "region_name" if "region_name" in df_region.columns else "regionId"
    has_ll    = "logloss" in df_region.columns
    has_lowll = "low_logloss" in df_region.columns

    cols = [name_col, "n_bets", "hit_rate", "roi_pct"]
    if has_ll:
        cols.append("logloss")
    cols = [c for c in cols if c in df_region.columns]

    df_reg_show = df_region[cols].sort_values("roi_pct", ascending=False).reset_index(drop=True).copy()

    if has_lowll:
        star_mask = df_region.sort_values("roi_pct", ascending=False)["low_logloss"].fillna(False).values
        df_reg_show[name_col] = [
            f"★ {v}" if star else v
            for v, star in zip(df_reg_show[name_col].astype(str), star_mask)
        ]

    if "hit_rate" in df_reg_show.columns:
        df_reg_show["hit_rate"] = df_reg_show["hit_rate"].map(
            lambda x: f"{x:.1%}" if pd.notna(x) else "—"
        )
    if "roi_pct" in df_reg_show.columns:
        df_reg_show["roi_pct"] = df_reg_show["roi_pct"].map(
            lambda x: f"{x:+.1f}%" if pd.notna(x) else "—"
        )
    if has_ll:
        df_reg_show["logloss"] = df_reg_show["logloss"].map(
            lambda x: f"{x:.4f}" if pd.notna(x) else "—"
        )

    display(HTML(df_reg_show.to_html(index=False, border=0,
        classes="table table-striped", justify="left")))
region_name n_bets hit_rate roi_pct logloss
Japan 10 70.0% +45.1% 1.0468
Egypt 10 70.0% +42.2% 1.0181
★ Scotland 12 66.7% +34.4% 0.9980
USA 56 62.5% +22.6% 1.0349
Chile 31 54.8% +22.5% 1.0532
★ Bulgaria 19 52.6% +21.6% 0.9971
★ Netherlands 29 62.1% +18.8% 0.9922
Algeria 12 58.3% +11.5% 1.0038
Portugal 20 60.0% +9.4% 1.0044
Ecuador 15 53.3% +8.3% 1.0355
Germany 37 54.1% +7.4% 1.0204
Georgia 14 50.0% +7.1% 1.0383
Belarus 11 54.5% +3.2% 1.0068
★ Norway 61 57.4% +1.8% 0.9442
England 22 45.5% -0.2% 1.0267
Indonesia 14 57.1% -5.1% 1.0191
Israel 23 43.5% -7.0% 1.0128
Poland 46 45.7% -8.2% 1.0583
Spain 39 51.3% -10.9% 1.0098
Colombia 17 52.9% -11.6% 1.0180
Brazil 35 42.9% -11.9% 1.0350
South America 28 53.6% -11.9% 1.0291
Argentina 53 41.5% -13.9% 1.0506
Iceland 22 45.5% -18.1% 1.0209
Ireland 13 46.2% -18.6% 1.0291
Romania 14 35.7% -19.6% 1.0174
★ Sweden 47 42.6% -20.2% 0.9963
Italy 30 43.3% -21.5% 1.0332
Hungary 10 50.0% -23.0% 1.0374
★ Serbia 10 50.0% -28.8% 0.9750
France 16 37.5% -30.1% 1.0106
Iraq 12 33.3% -32.1% 1.0008
★ Finland 13 38.5% -34.9% 0.9953
★ South Africa 14 28.6% -36.2% 0.9921
★ Ukraine 13 46.2% -36.4% 0.9757
Denmark 19 36.8% -36.9% 1.0298
South Korea 15 33.3% -40.1% 1.0658
★ Czech Republic 15 33.3% -41.2% 0.9853
Belgium 15 26.7% -43.7% 1.0087
★ Bosnia-Herzegovina 12 33.3% -47.0% 0.8823
★ Undefined 23 21.7% -53.6% 0.9881
★ China 27 22.2% -60.4% 0.9937
Show code
if df_region is not None and not df_region.empty:
    name_col = "region_name" if "region_name" in df_region.columns else "regionId"
    has_lowll = "low_logloss" in df_region.columns

    df_chart = df_region.sort_values("roi_pct", ascending=False).reset_index(drop=True)
    labels = df_chart[name_col].astype(str).str[:35]
    if has_lowll:
        labels = labels.where(
            ~df_chart["low_logloss"].fillna(False), "★ " + labels
        )
    roi_vals = df_chart["roi_pct"].values
    colors = ["#27ae60" if v >= 0 else "#c0392b" for v in roi_vals]

    logloss_median = (
        float(df_chart["logloss"].median()) if "logloss" in df_chart.columns else None
    )

    fig, ax = plt.subplots(figsize=(10, max(5, len(df_chart) * 0.4)))
    ax.barh(labels[::-1], roi_vals[::-1], color=colors[::-1])
    ax.axvline(0, color="black", linewidth=0.8)
    ax.set_xlabel("ROI (%)")
    title = "ROI by region (flat-stake, Fonbet odds)"
    if logloss_median is not None:
        title += f"\n★ = historical logloss < {logloss_median:.3f} (holdout median)"
    ax.set_title(title)
    plt.tight_layout()
    plt.show()


4. ROI by tournament

Drill-down from the regional view above. Only tournaments with at least 10 bets are shown. Highlights which specific leagues drive a region’s positive or negative ROI. Useful for identifying the most and least valuable leagues within a promising region.

Show code
if df_segment is not None and not df_segment.empty:
    name_col = "tournament_name" if "tournament_name" in df_segment.columns else "tournamentId"
    region_col = "region_name" if "region_name" in df_segment.columns else None

    cols = [name_col]
    if region_col:
        cols.append(region_col)
    cols += ["n_bets", "hit_rate", "roi_pct"]
    cols = [c for c in cols if c in df_segment.columns]

    df_seg_show = (
        df_segment[cols]
        .sort_values("roi_pct", ascending=False)
        .reset_index(drop=True)
    )

    # Top / bottom 15
    n_show = 15
    if len(df_seg_show) > n_show * 2:
        df_top = df_seg_show.head(n_show)
        df_bot = df_seg_show.tail(n_show)
        display(Markdown(f"**Top {n_show} tournaments by ROI:**"))
        display(HTML(df_top.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))
        display(Markdown(f"**Bottom {n_show} tournaments by ROI:**"))
        display(HTML(df_bot.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))
    else:
        display(HTML(df_seg_show.to_html(index=False, border=0,
            classes="table table-striped", float_format="{:.2f}".format)))

    # Bar chart of ROI for top/bottom
    top_n = min(20, len(df_seg_show))
    df_chart = pd.concat([df_seg_show.head(top_n // 2), df_seg_show.tail(top_n // 2)])
    labels = df_chart[name_col].astype(str).str[:30]
    colors = ["#27ae60" if v >= 0 else "#c0392b" for v in df_chart["roi_pct"]]

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.barh(labels, df_chart["roi_pct"], color=colors)
    ax.axvline(0, color="black", linewidth=0.8)
    ax.set_xlabel("ROI (%)")
    ax.set_title("Top / bottom tournaments by ROI (flat-stake)")
    ax.invert_yaxis()
    plt.tight_layout()
    plt.show()

Top 15 tournaments by ROI:

tournament_name region_name n_bets hit_rate roi_pct
Primera B Chile 16 0.75 81.88
USL Championship USA 13 0.69 52.92
J- League Japan 10 0.70 45.10
Premier League Egypt 10 0.70 42.20
Tweede Divisie Netherlands 15 0.67 29.33
LaLiga Spain 18 0.61 22.06
Eliteserien Norway 18 0.67 18.39
A PFG Bulgaria 13 0.46 17.85
NWSL USA 12 0.58 17.33
I Liga Poland 15 0.47 15.67
Premier League Belarus 10 0.60 13.50
Liga Leumit Israel 12 0.50 13.33
Ligue Professionnelle 1 Algeria 12 0.58 11.50
Copa Libertadores South America 13 0.69 11.08
Major League Soccer USA 30 0.60 10.57

Bottom 15 tournaments by ROI:

tournament_name region_name n_bets hit_rate roi_pct
Copa Sudamericana South America 15 0.40 -31.87
Super League Iraq 12 0.33 -32.08
Colombia Cup 1 Colombia 12 0.42 -33.25
Allsvenskan Sweden 17 0.35 -35.00
Segunda División Spain 20 0.45 -36.20
Premier Soccer League South Africa 14 0.29 -36.21
Druha League Czech Republic 14 0.36 -37.00
K League 2 South Korea 15 0.33 -40.07
Premier League Ukraine 12 0.42 -40.67
Clausura Chile 15 0.33 -40.73
Jupiler Pro League Belgium 15 0.27 -43.67
Premier League Bosnia-Herzegovina 12 0.33 -47.00
NaN Undefined 22 0.23 -51.50
Primera B Metropolitana Argentina 18 0.22 -55.28
Super League China 12 0.17 -71.00


5. Cumulative P&L curve

Running P&L puts the regional findings in temporal context — whether the positive edge is consistent over time or concentrated in a short window. Each point is one placed bet; the curve grows as new finished matches with Fonbet odds arrive.

Show code
if df_ts is not None and not df_ts.empty:
    if "date" in df_ts.columns:
        df_ts["date"] = pd.to_datetime(df_ts["date"], utc=True, errors="coerce")
        df_ts = df_ts.sort_values("date").reset_index(drop=True)

    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

    x = df_ts.index + 1
    axes[0].plot(x, df_ts["cumulative_profit"], color="#2c3e50", linewidth=1.5)
    axes[0].axhline(0, color="gray", linewidth=0.8, linestyle="--")
    axes[0].fill_between(x, df_ts["cumulative_profit"], 0,
        where=df_ts["cumulative_profit"] >= 0, alpha=0.25, color="#27ae60")
    axes[0].fill_between(x, df_ts["cumulative_profit"], 0,
        where=df_ts["cumulative_profit"] < 0, alpha=0.25, color="#c0392b")
    axes[0].set_ylabel("Cumulative profit (units)")
    axes[0].set_title("Cumulative P&L — flat stake (1 unit per bet)")

    axes[1].plot(x, df_ts["cumulative_roi_pct"], color="#8e44ad", linewidth=1.5)
    axes[1].axhline(0, color="gray", linewidth=0.8, linestyle="--")
    axes[1].set_xlabel("Bet number")
    axes[1].set_ylabel("Cumulative ROI (%)")
    axes[1].set_title("Running ROI %")
    axes[1].yaxis.set_major_formatter(mticker.FormatStrFormatter("%.1f%%"))

    plt.tight_layout()
    plt.show()

    # Summary stats
    final_roi = float(df_ts["cumulative_roi_pct"].iloc[-1])
    max_drawdown = float(
        (df_ts["cumulative_profit"] - df_ts["cumulative_profit"].cummax()).min()
    )
    display(Markdown(
        f"**Final ROI:** {final_roi:+.2f}% over {len(df_ts)} bets. "
        f"**Max drawdown:** {max_drawdown:.2f} units."
    ))

Final ROI: -9.20% over 988 bets. Max drawdown: -117.07 units.


Summary

Show code
if df_overall is not None:
    flat = df_overall[df_overall["strategy"] == "flat_stake"].iloc[0]
    kelly_rows = df_overall[df_overall["strategy"].str.startswith("kelly")]

    lines = [
        f"- **Matched finished matches:** {int(flat['n_matches']):,}",
        f"- **Bets placed (flat):** {int(flat['n_bets']):,} ({float(flat['bet_rate']):.1%} of matches)",
        f"- **Flat-stake ROI:** {float(flat['roi_pct']):+.1f}%",
        f"- **Hit rate:** {float(flat['hit_rate']):.1%}" if pd.notna(flat.get("hit_rate")) else "",
    ]
    if not kelly_rows.empty:
        k = kelly_rows.iloc[0]
        lines.append(f"- **Kelly (f=0.25) bankroll growth:** {float(k['bankroll_growth_pct']):+.1f}%")

    # Regional edge summary
    if df_region is not None and not df_region.empty:
        name_col = "region_name" if "region_name" in df_region.columns else "regionId"
        pos_regions = df_region[df_region["roi_pct"] > 0].sort_values("roi_pct", ascending=False)
        if not pos_regions.empty:
            top_names = ", ".join(pos_regions[name_col].astype(str).head(5).tolist())
            caveat = (
                " — limited data, monitor as more matches settle."
                if int(flat["n_matches"]) < 200
                else "."
            )
            lines.append(
                f"- **Regions with positive ROI ({len(pos_regions)}):** {top_names}{caveat}"
            )

    lines.append("")
    lines.append(
        "> This report accumulates results over time. Re-run `make live-betting` "
        "and re-render when new match data arrives."
    )
    display(Markdown("\n".join(l for l in lines if l is not None)))
  • Matched finished matches: 1,136
  • Bets placed (flat): 1,136 (100.0% of matches)
  • Flat-stake ROI: -7.7%
  • Hit rate: 48.8%
  • Kelly (f=0.25) bankroll growth: -86.7%
  • Regions with positive ROI (14): Japan, Egypt, Scotland, USA, Chile.

This report accumulates results over time. Re-run make live-betting and re-render when new match data arrives.