From 667bdc8213c40249502084c244f1de5f4ed9eac1 Mon Sep 17 00:00:00 2001
From: marques <gitea@marques22.com>
Date: Wed, 19 Nov 2025 16:27:04 +0800
Subject: [PATCH] Add PPG signal processing and SpO2 calculation modules

---
 PPG2SpO2.py              | 114 ++++++++++++++++++
 func.py                  | 243 +++++++++++++++++++++++++++++++++++++++
 process_spo2_alg_info.py | 100 ++++++++++++++++
 process_spo2_hr.py       |  52 +++++++++
 spo2_pipeline.py         | 114 ++++++++++++++++++
 5 files changed, 623 insertions(+)
 create mode 100644 PPG2SpO2.py
 create mode 100644 func.py
 create mode 100644 process_spo2_alg_info.py
 create mode 100644 process_spo2_hr.py
 create mode 100644 spo2_pipeline.py

diff --git a/PPG2SpO2.py b/PPG2SpO2.py
new file mode 100644
index 0000000..fb80a31
--- /dev/null
+++ b/PPG2SpO2.py
@@ -0,0 +1,114 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+import os
+os.environ["DISPLAY"] = "localhost:10.0"
+from func import calculate_hr_spo2_zhihu, ppg2spo2_pipeline, func_1
+from spo2_pipeline import spo2_pipeline
+
+def read_data(file_path):
+    """
+    Read data from a file and return it as a list of floats.
+
+    Args:
+        file_path (str or Path): The path to the file to read.
+    Returns:
+        list of float: The data read from the file.
+    """
+    df = pd.read_csv(file_path)
+    # red = df['red'].values[300:]
+    # ir = df['ied'].values[300:]
+    red = df['red'].values[50:]
+    ir = df['ied'].values[50:]
+    return red, ir
+
+
+def read_labels(file_path):
+    """
+    Read labels from a file and return them as a list of integers.
+
+    Args:
+        file_path (str or Path): The path to the file to read.
+    Returns:
+        list of int: The labels read from the file.
+    """
+    df = pd.read_csv(file_path)
+    timestamp = df['timestamp'].values
+    hr_label = df['心率_值'].values
+    spo2_label = df['血氧组_值'].values.astype(float)
+    r_value = df['R组_均方根'].values
+
+    np.place(spo2_label, spo2_label == 0, np.nan)
+
+    return timestamp, hr_label, spo2_label, r_value
+
+
+def draw_compare(red_signal, ir_signal, spo2_values, hr_values, r_values, fs=25):
+    """
+    Draw comparison plots for red and IR signals along with SpO2 values.
+
+    Args:
+        red_signal (array-like): The red light signal data.
+        ir_signal (array-like): The infrared light signal data.
+        spo2_values (array-like): The computed SpO2 values.
+        fs (int): Sampling frequency in Hz. Default is 25.
+    """
+    time_axis = [i / fs for i in range(len(red_signal))]
+
+    plt.figure(figsize=(15, 10))
+
+    plt.subplot(3, 1, 1)
+    plt.plot(time_axis, red_signal, label='Red Signal', color='red')
+    plt.plot(time_axis, ir_signal, label='IR Signal', color='blue')
+    plt.title('Red and IR Signals')
+    plt.xlabel('Time (s)')
+    plt.ylabel('Amplitude')
+    plt.legend()
+
+    plt.subplot(3, 1, 2)
+    plt.plot(spo2_values, label='Computed SpO2', color='green')
+    plt.title('Computed SpO2 Values')
+    plt.xlabel('Beats')
+    plt.ylabel('SpO2 (%)')
+    plt.legend()
+
+    plt.subplot(3, 1, 3)
+    plt.plot(hr_values, label='Computed Heart Rate', color='orange')
+    plt.title('Computed Heart Rate Values')
+    plt.xlabel('Beats')
+    plt.ylabel('Heart Rate (bpm)')
+    plt.legend()
+
+    plt.tight_layout()
+    plt.show()
+
+
+
+
+if __name__ == '__main__':
+    file_path = Path("./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.csv")
+    # file_path = "./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.csv"
+    red_signal, ir_signal = read_data(file_path)
+
+    label_path = file_path.parent / "spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.csv"
+    timestamp, hr_label, spo2_label, r_value = read_labels(label_path)
+
+    # draw_compare(red_signal, ir_signal, spo2_label, hr_label, r_value, fs=25)
+    # df, summary = spo2_pipeline(
+    #     red_signal,
+    #     ir_signal,
+    #     fs=25,
+    #     calibrate_with_ref=spo2_label,
+    #     A=104.0,
+    #     B=17.0
+    # )
+    # print(summary)
+    # print(df)
+    # HR_result, SpO2_result = ppg2spo2_pipeline(red_signal, ir_signal, fs=25)
+
+    func_1(red_signal, ir_signal, fs=25)
+
+
+
diff --git a/func.py b/func.py
new file mode 100644
index 0000000..803131c
--- /dev/null
+++ b/func.py
@@ -0,0 +1,243 @@
+import numpy as np
+import pandas as pd
+from scipy import signal
+from matplotlib import pyplot as plt
+
+def calculate_hr_spo2_zhihu(X, fs, FFT_size=512):
+    """
+    X          : ndarray, shape (N, 2)   # 第0列 RED，第1列 IR
+    fs         : 采样率 (Hz)
+    FFT_size   : FFT 点数（建议 2 的幂）
+    返回值     : (Heart_Rate, SpO2_Level)  已经取整
+    https://zhuanlan.zhihu.com/p/658858641
+    https://github.com/thinkng/ppgprocessing/blob/master/HR_SpO2_Estimation.m
+    """
+    X = np.asarray(X)
+    if X.ndim != 2 or X.shape[1] != 2:
+        raise ValueError("X 必须是 (样本数, 2) 的数组，列0=RED，列1=IR")
+
+    # 计算能处理的完整窗口数量（每个窗口长度 = FFT_size）
+    step = fs                      # MATLAB 中是 n*fs 开始，步长为 fs（1 秒）
+    n_windows = int((len(X) / (2 * fs)) - 2)   # 与原 MATLAB 一致
+
+    HEART_RATE = np.zeros(n_windows)
+    SpO2       = np.zeros(n_windows)
+
+    for n in range(n_windows):
+        start_idx = n * step
+        end_idx   = start_idx + FFT_size
+        y1 = X[start_idx:end_idx, 0]   # RED
+        y2 = X[start_idx:end_idx, 1]   # IR
+
+        # ---------- FFT RED ----------
+        Y1 = np.fft.fft(y1, n=FFT_size)
+        Y1_abs = np.abs(Y1[:FFT_size//2 + 1])
+        f1 = fs / 2 * np.linspace(0, 1, FFT_size//2 + 1)
+
+        # ---------- FFT IR ----------
+        Y2 = np.fft.fft(y2, n=FFT_size)
+        Y2_abs = np.abs(Y2[:FFT_size//2 + 1])
+        f2 = f1.copy()   # 与 f1 完全相同
+
+        # ---------- 在 0.5~2.5 Hz（对应心率 30~150 bpm）范围内找局部最大 ----------
+        # MATLAB 中索引 6:12 对应频率大约 0.6~1.4 Hz（取决于 FFT_size/fs）
+        # 这里统一取频率 0.5~2.5 Hz 对应的索引
+        idx_range = np.where((f1 >= 0.5) & (f1 <= 2.5))[0]
+
+        # RED 峰值索引
+        segment_red = Y1_abs[idx_range]
+        local_max_i = np.argmax(segment_red)
+        pk_RED_i = idx_range[local_max_i]
+
+        # IR 峰值索引
+        segment_ir = Y2_abs[idx_range]
+        local_max_i = np.argmax(segment_ir)
+        pk_IR_i = idx_range[local_max_i]
+
+        # ---------- 心率 ----------
+        heart_rate_bpm = f2[pk_IR_i] * 60
+        HEART_RATE[n] = heart_rate_bpm
+
+        # ---------- SpO2 ----------
+        R_RED = Y1_abs[pk_RED_i] / (Y1_abs[0] + 1e-12)   # 防止除以 0
+        R_IR  = Y2_abs[pk_IR_i]  / (Y2_abs[0]  + 1e-12)
+        R = R_RED / R_IR
+        spo2 = 104 - 28 * R
+        SpO2[n] = spo2
+
+    # 去掉首尾（与原 MATLAB 相同）
+    if len(HEART_RATE) > 2:
+        HR_mean = np.mean(HEART_RATE[1:-1])
+        SpO2_mean = np.mean(SpO2[1:-1])
+    else:
+        HR_mean = np.mean(HEART_RATE)
+        SpO2_mean = np.mean(SpO2)
+
+    Heart_Rate = round(HR_mean)
+    SpO2_Level = round(SpO2_mean)
+
+    return Heart_Rate, SpO2_Level
+
+
+
+def _culculate_spo2(ir_list_data, red_list_data):
+    ir_dc = min(ir_list_data)
+    red_dc = min(red_list_data)
+    ir_ac = max(ir_list_data) - ir_dc
+    red_ac = max(red_list_data) - red_dc
+    temp1 = ir_ac * red_dc
+    if temp1 < 1:
+        temp1 = 1
+    R2 = (red_ac * ir_dc) / temp1
+    SPO2 = -45.060 * R2 * R2 + 30.354 * R2 + 94.845
+    if SPO2 > 100 or SPO2 < 0:
+        SPO2 = 0
+    return SPO2
+
+def _culculate_HR(ir_list_data_filtered, data_list_time):
+    HR_num = signal.find_peaks(ir_list_data_filtered, distance=10)[0]
+    time = data_list_time[-1] -data_list_time[0]
+    HR = len(HR_num) / (time / 1000) * 60
+    return HR
+
+
+def process_signal(signal_segment, fs, highpass=True):
+    if highpass:
+        h_b, h_a = signal.butter(N=8, Wn=1/(fs/2), btype="highpass", output="ba")
+        data = signal.filtfilt(h_b, h_a, signal_segment, axis=0)
+    else:
+        data = signal_segment
+    data = signal.detrend(data,
+                          axis=0,
+                          type='linear',
+                          bp=0,
+                            overwrite_data=False)
+    return data
+
+
+def ppg2spo2_pipeline(red, ir, fs=25):
+    """
+    采用滑窗分析法计算心率和血氧饱和度
+    每秒中输出结果
+
+    red : ndarray, 红光信号
+    ir  : ndarray, 红外光信号
+    fs  : 采样率 (Hz)
+    """
+
+    red = np.asarray(red).reshape(-1)
+    ir  = np.asarray(ir).reshape(-1)
+
+
+    if len(red) != len(ir):
+        raise ValueError("红光和红外光信号长度必须相同")
+
+    red_filtered = process_signal(red, fs, highpass=False)
+    ir_filtered  = process_signal(ir, fs, highpass=True)
+
+    bpm_list_data = []
+    spo2_list_data = []
+    temp_bpm_list_data = []
+    temp_spo2_list_data = []
+
+    for i in range(len(red)//fs - 1):
+        red_segment = red_filtered[i*fs:(i+1)*fs]
+        ir_segment  = ir_filtered[i*fs:(i+1)*fs]
+        spo2 = _culculate_spo2(ir_segment, red_segment)
+        bpm = _culculate_HR(ir_segment, np.arange(i*fs, (i+1)*fs) * (1000/fs))
+        temp_bpm_list_data.append(bpm)
+        temp_spo2_list_data.append(spo2)
+    # matlab
+
+    # python
+
+
+    plt.figure(figsize=(10, 5))
+    timestamp = np.linspace(0, len(red_filtered)/fs, len(red_filtered))
+    ax1 = plt.subplot(211)
+    plt.plot( timestamp, red, label='Red Signal', color='red', alpha=0.5)
+    plt.plot(timestamp,ir, label='IR Signal', color='blue', alpha=0.5)
+    plt.title('Raw PPG Signals')
+    plt.xlabel("seconds")
+    plt.ylabel('Amplitude')
+    plt.legend()
+    plt.subplot(212, sharex=ax1)
+    plt.plot(timestamp,red_filtered, label='Filtered Red Signal', color='red', alpha=0.5)
+    plt.plot(timestamp,ir_filtered, label='Filtered IR Signal', color='blue', alpha=0.5)
+    plt.title('Filtered PPG Signals')
+    plt.xlabel("seconds")
+    plt.ylabel('Amplitude')
+    plt.legend()
+    plt.show()
+
+
+
+
+def func_1(red, ir, fs=25):
+    # Processing_PPG_Signal
+    # make a move window find min and max of ArrayIR
+    def movmin1(A, k):
+        x = A.rolling(k, min_periods=1, center=True).min().to_numpy()  #
+        return x
+
+    def movmax1(A, k):
+        x = A.rolling(k, min_periods=1, center=True).max().to_numpy()
+        return x
+
+    ArrayIR = pd.DataFrame(ir)
+    ArrayRed = pd.DataFrame(red)
+
+    # calculate ac/dc ir
+    max_ir = movmax1(ArrayIR, fs)
+    # print(f"max_ir: {max_ir}")
+    min_ir = movmin1(ArrayIR, fs)
+    # print(f"min_ir: {min_ir}")
+    baseline_data_ir = (max_ir + min_ir) / 2
+    # print(f"baseline_data_ir: {baseline_data_ir}")
+    acDivDcIr = (max_ir - min_ir) / baseline_data_ir
+
+    # calculate ac/dc red
+    max_red = movmax1(ArrayRed, fs)
+    min_red = movmin1(ArrayRed, fs)
+    baseline_data_red = (max_red + min_red) / 2
+    acDivDcRed = (max_red - min_red) / baseline_data_red
+
+    # Plot SPO2 = 110-25*(ac/dc_red)/(ac/dc_ir)
+    SPO2 = 110 - 25 * (acDivDcRed / acDivDcIr)
+    # plt.figure("SPO2")
+    timestamp = np.linspace(0, len(red) / fs, len(red))
+    plt.figure(figsize=(10, 5))
+    ax1 = plt.subplot(311)
+    plt.plot(timestamp, red, label='Red Signal', color='red', alpha=0.5)
+    plt.plot(timestamp, ir, label='IR Signal', color='blue', alpha=0.5)
+    plt.title('Raw PPG Signals')
+    plt.xlabel("seconds")
+    plt.ylabel('Amplitude')
+    plt.legend()
+
+    plt.subplot(312, sharex=ax1)
+    plt.plot(timestamp, red - baseline_data_red, label='Detrended Red Signal', color='red', alpha=0.5)
+    plt.plot(timestamp, ir - baseline_data_ir, label='Detrended IR Signal', color='blue', alpha=0.5)
+    plt.title('Detrended PPG Signals')
+    plt.xlabel("seconds")
+    plt.ylabel('Amplitude')
+    plt.legend()
+
+    plt.subplot(313, sharex=ax1)
+    plt.plot(timestamp,acDivDcRed, label='AC/DC Red', color='red', alpha=0.5)
+    plt.plot(timestamp,acDivDcIr, label='AC/DC IR', color='blue', alpha=0.5)
+    plt.title('AC/DC Ratios')
+    plt.xlabel("seconds")
+    plt.ylabel('Ratio')
+    plt.legend()
+    plt.show()
+    plt.xlabel("Samples")
+    plt.ylabel("SPO2")
+    plt.title("SPO2")
+    plt.plot(timestamp, SPO2)
+    plt.show()
+
+
+
+
+
diff --git a/process_spo2_alg_info.py b/process_spo2_alg_info.py
new file mode 100644
index 0000000..6d323e8
--- /dev/null
+++ b/process_spo2_alg_info.py
@@ -0,0 +1,100 @@
+import re
+import pandas as pd
+from pathlib import Path
+from typing import List, Dict
+
+
+def parse_line(line: str) -> Dict:
+    """
+    解析一行数据，返回一个字典
+    例如输入:
+    [300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1     血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1     R组:;均方根:0;     仪器:60
+[325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;值:81;信任级别:1     血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:2     R组:;均方根:0;     仪器:60
+[350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1     血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3     R组:;均方根:0;     仪器:60
+[375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;值:82;信任级别:0     血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3     R组:;均方根:0;     仪器:60
+[400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;值:82;信任级别:0     血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3     R组:;均方根:0;     仪器:60
+    """
+    line = line.strip()
+    if not line:
+        return None
+
+    # 提取时间戳 [300] 这样的
+    time_match = re.match(r'\[(\d+)\]', line)
+    if not time_match:
+        return None
+    timestamp = int(time_match.group(1))
+
+    # 去掉时间戳部分，后面全部是键值对
+    content = line[time_match.end():]
+    content = content.split("     ")
+
+    result = {"timestamp": timestamp}
+
+    for block in content:
+        parsed_block = parse_block(block)
+        result.update(parsed_block)
+    return result
+
+
+def parse_block(block: str) -> Dict:
+    """
+    解析一个数据块，返回一个字典
+    例如输入:
+    心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1
+    血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1
+    R组:;均方根:0;
+    仪器:60
+    """
+    data = {}
+    parts = block.split(';')
+    if len(parts) == 1:
+        key, value = parts[0].split(':', 1)
+        data[key.strip()] = value.strip()
+    else:
+        key_0 = parts[0].split(':', 1)[0].strip()
+        for part in parts:
+            if len(part.strip()) == 0:
+                continue
+            if "结果延迟" in part:
+                part0, key1, value1 = part.partition('结果延迟:')
+                data[f"{key_0}_结果延迟"] = value1.strip()
+                key2, _, value2 = part0.partition(':')
+                if key2 and value2:
+                    data[f"{key_0}_{key2.strip()}"] = value2.strip()
+            else:
+                key, value = part.split(':', 1)
+                data[f"{key_0}_{key.strip()}"] = value.strip()
+
+    return data
+
+
+
+def parse_dat_file(file_path) -> pd.DataFrame:
+    """
+    读取整个 .dat 文件并返回 pandas DataFrame
+    """
+    data = []
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            parsed = parse_line(line)
+            if parsed:
+                data.append(parsed)
+            else:
+                if line.strip():
+                    print(f"第 {line_num} 行解析失败，已跳过: {line.strip()[:80]}...")
+
+    df = pd.DataFrame(data)
+
+    # 按时间戳排序（一般已经是顺序，但保险起见）
+    if not df.empty:
+        df = df.sort_values("timestamp").reset_index(drop=True)
+
+    return df
+
+if __name__ == '__main__':
+    # file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat"
+    file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat"
+    file_path = Path(file_path)
+    df = parse_dat_file(file_path)
+
+    df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig')
\ No newline at end of file
diff --git a/process_spo2_hr.py b/process_spo2_hr.py
new file mode 100644
index 0000000..36d1dc4
--- /dev/null
+++ b/process_spo2_hr.py
@@ -0,0 +1,52 @@
+'''样例数据
+red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
+red:9185136;ied:10744763;accX:209;accY:-438;accZ:-171
+red:9182875;ied:10744312;accX:209;accY:-438;accZ:-171
+red:9181788;ied:10744212;accX:209;accY:-441;accZ:-163
+red:9180729;ied:10744341;accX:209;accY:-441;accZ:-163
+red:9179405;ied:10742269;accX:212;accY:-440;accZ:-172
+red:9178563;ied:10741796;accX:212;accY:-440;accZ:-172
+red:9178557;ied:10742843;accX:209;accY:-440;accZ:-166
+red:9178185;ied:10742777;accX:209;accY:-440;accZ:-166
+red:9177053;ied:10737766;accX:210;accY:-440;accZ:-166
+'''
+
+import numpy as np
+import pandas as pd
+from pathlib import Path
+def parse_dat_line(line: str) -> dict:
+    """
+    解析一行 .dat 文件内容，返回一个字典
+    例如输入:
+    red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
+    """
+    data = {}
+    parts = line.strip().split(';')
+    for part in parts:
+        if len(part.strip()) == 0:
+            continue
+        key, value = part.split(':', 1)
+        data[key.strip()] = int(value.strip())
+    return data
+def parse_dat_file(file_path) -> pd.DataFrame:
+    """
+    读取整个 .dat 文件并返回 pandas DataFrame
+    :param file_path:
+    :return:
+    """
+    records = []
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            parsed_line = parse_dat_line(line)
+            records.append(parsed_line)
+    df = pd.DataFrame(records)
+    return df
+
+if __name__ == '__main__':
+    # file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.dat'
+    file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.dat'
+    file_path = Path(file_path)
+    df = parse_dat_file(file_path)
+
+
+    df.to_csv(file_path.with_suffix('.csv'), index=False)
\ No newline at end of file
diff --git a/spo2_pipeline.py b/spo2_pipeline.py
new file mode 100644
index 0000000..a036c46
--- /dev/null
+++ b/spo2_pipeline.py
@@ -0,0 +1,114 @@
+
+import numpy as np
+import pandas as pd
+from scipy.signal import butter, filtfilt, find_peaks
+
+def bandpass_filter(x, fs, low=0.5, high=5.0, order=3):
+    nyq = 0.5 * fs
+    lown = low / nyq
+    highn = high / nyq
+    b, a = butter(order, [lown, highn], btype='band')
+    y = filtfilt(b, a, x)
+    return y
+
+def detrend(x):
+    return x - np.mean(x)
+
+def detect_peaks(signal, fs, hr_min=40, hr_max=220):
+    min_dist = int(np.floor(fs * 60.0 / hr_max))
+    prominence = 0.3 * np.std(signal)
+    peaks, props = find_peaks(signal, distance=min_dist, prominence=prominence)
+    return peaks, props
+
+def compute_ac_dc_per_beat(signal, peaks, fs):
+    n = len(signal)
+    AC, DC, times, beat_windows = [], [], [], []
+    for i, p in enumerate(peaks):
+        if i == 0:
+            start = 0
+        else:
+            start = (peaks[i-1] + p) // 2
+        if i == len(peaks)-1:
+            end = n-1
+        else:
+            end = (p + peaks[i+1]) // 2
+        segment = signal[start:end+1]
+        if segment.size < 3:
+            AC.append(np.nan); DC.append(np.nan)
+            times.append(p / fs)
+            beat_windows.append((start, end))
+            continue
+        seg_max = np.max(segment)
+        seg_min = np.min(segment)
+        ac = (seg_max - seg_min) / 2.0
+        dc = np.mean(segment)
+        AC.append(ac); DC.append(dc)
+        times.append(p / fs)
+        beat_windows.append((start, end))
+    return np.array(AC), np.array(DC), np.array(times), beat_windows
+
+def compute_spo2_from_R(R, A=104.0, B=17.0):
+    return A - B * R
+
+def calibrate_linear(R_vals, spo2_ref):
+    mask = ~np.isnan(R_vals) & ~np.isnan(spo2_ref)
+    if mask.sum() < 2:
+        return None
+    coeffs = np.polyfit(R_vals[mask], spo2_ref[mask], 1)
+    c1, c0 = coeffs[0], coeffs[1]
+    A = c0; B = -c1
+    return float(A), float(B)
+
+def spo2_pipeline(red, ir, fs=25, calibrate_with_ref=None, A=104.0, B=17.0):
+    assert red.shape == ir.shape, "red and ir must have same shape"
+    red_f = bandpass_filter(red, fs)
+    ir_f = bandpass_filter(ir, fs)
+    red_d = detrend(red_f)
+    ir_d = detrend(ir_f)
+    peaks, props = detect_peaks(ir_d, fs)
+    ac_red, dc_red, times, _ = compute_ac_dc_per_beat(red_d, peaks, fs)
+    ac_ir, dc_ir, _, _ = compute_ac_dc_per_beat(ir_d, peaks, fs)
+
+    with np.errstate(divide='ignore', invalid='ignore'):
+        ratio_red = ac_red / dc_red
+        ratio_ir  = ac_ir  / dc_ir
+        R = ratio_red / ratio_ir
+
+    spo2_vals = compute_spo2_from_R(R, A=A, B=B)
+    used_A, used_B = A, B
+
+    # if calibrate_with_ref is not None:
+    #     if len(calibrate_with_ref) != len(R):
+    #         raise ValueError("Reference SpO2 length mismatch with beats.")
+    #     ref = np.array(calibrate_with_ref)
+    #     fit = calibrate_linear(R, ref)
+    #     if fit is not None:
+    #         used_A, used_B = fit
+    #         spo2_vals = compute_spo2_from_R(R, A=used_A, B=used_B)
+
+    df = pd.DataFrame({
+        "time_s": times,
+        "AC_red": ac_red,
+        "DC_red": dc_red,
+        "AC_ir": ac_ir,
+        "DC_ir": dc_ir,
+        "R": R,
+        "SpO2": spo2_vals
+    })
+
+    df['valid'] = (
+        (~np.isnan(df['R'])) &
+        (df['AC_red']>0) & (df['AC_ir']>0) &
+        (df['DC_red']>0) & (df['DC_ir']>0) &
+        (df['SpO2']>50) & (df['SpO2']<100)
+    )
+
+    summary = {
+        "n_beats": len(df),
+        "n_valid_beats": int(df['valid'].sum()),
+        "mean_spo2": float(np.nanmean(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
+        "median_spo2": float(np.nanmedian(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
+        "used_A": used_A,
+        "used_B": used_B
+    }
+    return df, summary