From 667bdc8213c40249502084c244f1de5f4ed9eac1 Mon Sep 17 00:00:00 2001 From: marques Date: Wed, 19 Nov 2025 16:27:04 +0800 Subject: [PATCH] Add PPG signal processing and SpO2 calculation modules --- PPG2SpO2.py | 114 ++++++++++++++++++ func.py | 243 +++++++++++++++++++++++++++++++++++++++ process_spo2_alg_info.py | 100 ++++++++++++++++ process_spo2_hr.py | 52 +++++++++ spo2_pipeline.py | 114 ++++++++++++++++++ 5 files changed, 623 insertions(+) create mode 100644 PPG2SpO2.py create mode 100644 func.py create mode 100644 process_spo2_alg_info.py create mode 100644 process_spo2_hr.py create mode 100644 spo2_pipeline.py diff --git a/PPG2SpO2.py b/PPG2SpO2.py new file mode 100644 index 0000000..fb80a31 --- /dev/null +++ b/PPG2SpO2.py @@ -0,0 +1,114 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +import os +os.environ["DISPLAY"] = "localhost:10.0" +from func import calculate_hr_spo2_zhihu, ppg2spo2_pipeline, func_1 +from spo2_pipeline import spo2_pipeline + +def read_data(file_path): + """ + Read data from a file and return it as a list of floats. + + Args: + file_path (str or Path): The path to the file to read. + Returns: + list of float: The data read from the file. + """ + df = pd.read_csv(file_path) + # red = df['red'].values[300:] + # ir = df['ied'].values[300:] + red = df['red'].values[50:] + ir = df['ied'].values[50:] + return red, ir + + +def read_labels(file_path): + """ + Read labels from a file and return them as a list of integers. + + Args: + file_path (str or Path): The path to the file to read. + Returns: + list of int: The labels read from the file. + """ + df = pd.read_csv(file_path) + timestamp = df['timestamp'].values + hr_label = df['心率_值'].values + spo2_label = df['血氧组_值'].values.astype(float) + r_value = df['R组_均方根'].values + + np.place(spo2_label, spo2_label == 0, np.nan) + + return timestamp, hr_label, spo2_label, r_value + + +def draw_compare(red_signal, ir_signal, spo2_values, hr_values, r_values, fs=25): + """ + Draw comparison plots for red and IR signals along with SpO2 values. + + Args: + red_signal (array-like): The red light signal data. + ir_signal (array-like): The infrared light signal data. + spo2_values (array-like): The computed SpO2 values. + fs (int): Sampling frequency in Hz. Default is 25. + """ + time_axis = [i / fs for i in range(len(red_signal))] + + plt.figure(figsize=(15, 10)) + + plt.subplot(3, 1, 1) + plt.plot(time_axis, red_signal, label='Red Signal', color='red') + plt.plot(time_axis, ir_signal, label='IR Signal', color='blue') + plt.title('Red and IR Signals') + plt.xlabel('Time (s)') + plt.ylabel('Amplitude') + plt.legend() + + plt.subplot(3, 1, 2) + plt.plot(spo2_values, label='Computed SpO2', color='green') + plt.title('Computed SpO2 Values') + plt.xlabel('Beats') + plt.ylabel('SpO2 (%)') + plt.legend() + + plt.subplot(3, 1, 3) + plt.plot(hr_values, label='Computed Heart Rate', color='orange') + plt.title('Computed Heart Rate Values') + plt.xlabel('Beats') + plt.ylabel('Heart Rate (bpm)') + plt.legend() + + plt.tight_layout() + plt.show() + + + + +if __name__ == '__main__': + file_path = Path("./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.csv") + # file_path = "./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.csv" + red_signal, ir_signal = read_data(file_path) + + label_path = file_path.parent / "spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.csv" + timestamp, hr_label, spo2_label, r_value = read_labels(label_path) + + # draw_compare(red_signal, ir_signal, spo2_label, hr_label, r_value, fs=25) + # df, summary = spo2_pipeline( + # red_signal, + # ir_signal, + # fs=25, + # calibrate_with_ref=spo2_label, + # A=104.0, + # B=17.0 + # ) + # print(summary) + # print(df) + # HR_result, SpO2_result = ppg2spo2_pipeline(red_signal, ir_signal, fs=25) + + func_1(red_signal, ir_signal, fs=25) + + + diff --git a/func.py b/func.py new file mode 100644 index 0000000..803131c --- /dev/null +++ b/func.py @@ -0,0 +1,243 @@ +import numpy as np +import pandas as pd +from scipy import signal +from matplotlib import pyplot as plt + +def calculate_hr_spo2_zhihu(X, fs, FFT_size=512): + """ + X : ndarray, shape (N, 2) # 第0列 RED,第1列 IR + fs : 采样率 (Hz) + FFT_size : FFT 点数(建议 2 的幂) + 返回值 : (Heart_Rate, SpO2_Level) 已经取整 + https://zhuanlan.zhihu.com/p/658858641 + https://github.com/thinkng/ppgprocessing/blob/master/HR_SpO2_Estimation.m + """ + X = np.asarray(X) + if X.ndim != 2 or X.shape[1] != 2: + raise ValueError("X 必须是 (样本数, 2) 的数组,列0=RED,列1=IR") + + # 计算能处理的完整窗口数量(每个窗口长度 = FFT_size) + step = fs # MATLAB 中是 n*fs 开始,步长为 fs(1 秒) + n_windows = int((len(X) / (2 * fs)) - 2) # 与原 MATLAB 一致 + + HEART_RATE = np.zeros(n_windows) + SpO2 = np.zeros(n_windows) + + for n in range(n_windows): + start_idx = n * step + end_idx = start_idx + FFT_size + y1 = X[start_idx:end_idx, 0] # RED + y2 = X[start_idx:end_idx, 1] # IR + + # ---------- FFT RED ---------- + Y1 = np.fft.fft(y1, n=FFT_size) + Y1_abs = np.abs(Y1[:FFT_size//2 + 1]) + f1 = fs / 2 * np.linspace(0, 1, FFT_size//2 + 1) + + # ---------- FFT IR ---------- + Y2 = np.fft.fft(y2, n=FFT_size) + Y2_abs = np.abs(Y2[:FFT_size//2 + 1]) + f2 = f1.copy() # 与 f1 完全相同 + + # ---------- 在 0.5~2.5 Hz(对应心率 30~150 bpm)范围内找局部最大 ---------- + # MATLAB 中索引 6:12 对应频率大约 0.6~1.4 Hz(取决于 FFT_size/fs) + # 这里统一取频率 0.5~2.5 Hz 对应的索引 + idx_range = np.where((f1 >= 0.5) & (f1 <= 2.5))[0] + + # RED 峰值索引 + segment_red = Y1_abs[idx_range] + local_max_i = np.argmax(segment_red) + pk_RED_i = idx_range[local_max_i] + + # IR 峰值索引 + segment_ir = Y2_abs[idx_range] + local_max_i = np.argmax(segment_ir) + pk_IR_i = idx_range[local_max_i] + + # ---------- 心率 ---------- + heart_rate_bpm = f2[pk_IR_i] * 60 + HEART_RATE[n] = heart_rate_bpm + + # ---------- SpO2 ---------- + R_RED = Y1_abs[pk_RED_i] / (Y1_abs[0] + 1e-12) # 防止除以 0 + R_IR = Y2_abs[pk_IR_i] / (Y2_abs[0] + 1e-12) + R = R_RED / R_IR + spo2 = 104 - 28 * R + SpO2[n] = spo2 + + # 去掉首尾(与原 MATLAB 相同) + if len(HEART_RATE) > 2: + HR_mean = np.mean(HEART_RATE[1:-1]) + SpO2_mean = np.mean(SpO2[1:-1]) + else: + HR_mean = np.mean(HEART_RATE) + SpO2_mean = np.mean(SpO2) + + Heart_Rate = round(HR_mean) + SpO2_Level = round(SpO2_mean) + + return Heart_Rate, SpO2_Level + + + +def _culculate_spo2(ir_list_data, red_list_data): + ir_dc = min(ir_list_data) + red_dc = min(red_list_data) + ir_ac = max(ir_list_data) - ir_dc + red_ac = max(red_list_data) - red_dc + temp1 = ir_ac * red_dc + if temp1 < 1: + temp1 = 1 + R2 = (red_ac * ir_dc) / temp1 + SPO2 = -45.060 * R2 * R2 + 30.354 * R2 + 94.845 + if SPO2 > 100 or SPO2 < 0: + SPO2 = 0 + return SPO2 + +def _culculate_HR(ir_list_data_filtered, data_list_time): + HR_num = signal.find_peaks(ir_list_data_filtered, distance=10)[0] + time = data_list_time[-1] -data_list_time[0] + HR = len(HR_num) / (time / 1000) * 60 + return HR + + +def process_signal(signal_segment, fs, highpass=True): + if highpass: + h_b, h_a = signal.butter(N=8, Wn=1/(fs/2), btype="highpass", output="ba") + data = signal.filtfilt(h_b, h_a, signal_segment, axis=0) + else: + data = signal_segment + data = signal.detrend(data, + axis=0, + type='linear', + bp=0, + overwrite_data=False) + return data + + +def ppg2spo2_pipeline(red, ir, fs=25): + """ + 采用滑窗分析法计算心率和血氧饱和度 + 每秒中输出结果 + + red : ndarray, 红光信号 + ir : ndarray, 红外光信号 + fs : 采样率 (Hz) + """ + + red = np.asarray(red).reshape(-1) + ir = np.asarray(ir).reshape(-1) + + + if len(red) != len(ir): + raise ValueError("红光和红外光信号长度必须相同") + + red_filtered = process_signal(red, fs, highpass=False) + ir_filtered = process_signal(ir, fs, highpass=True) + + bpm_list_data = [] + spo2_list_data = [] + temp_bpm_list_data = [] + temp_spo2_list_data = [] + + for i in range(len(red)//fs - 1): + red_segment = red_filtered[i*fs:(i+1)*fs] + ir_segment = ir_filtered[i*fs:(i+1)*fs] + spo2 = _culculate_spo2(ir_segment, red_segment) + bpm = _culculate_HR(ir_segment, np.arange(i*fs, (i+1)*fs) * (1000/fs)) + temp_bpm_list_data.append(bpm) + temp_spo2_list_data.append(spo2) + # matlab + + # python + + + plt.figure(figsize=(10, 5)) + timestamp = np.linspace(0, len(red_filtered)/fs, len(red_filtered)) + ax1 = plt.subplot(211) + plt.plot( timestamp, red, label='Red Signal', color='red', alpha=0.5) + plt.plot(timestamp,ir, label='IR Signal', color='blue', alpha=0.5) + plt.title('Raw PPG Signals') + plt.xlabel("seconds") + plt.ylabel('Amplitude') + plt.legend() + plt.subplot(212, sharex=ax1) + plt.plot(timestamp,red_filtered, label='Filtered Red Signal', color='red', alpha=0.5) + plt.plot(timestamp,ir_filtered, label='Filtered IR Signal', color='blue', alpha=0.5) + plt.title('Filtered PPG Signals') + plt.xlabel("seconds") + plt.ylabel('Amplitude') + plt.legend() + plt.show() + + + + +def func_1(red, ir, fs=25): + # Processing_PPG_Signal + # make a move window find min and max of ArrayIR + def movmin1(A, k): + x = A.rolling(k, min_periods=1, center=True).min().to_numpy() # + return x + + def movmax1(A, k): + x = A.rolling(k, min_periods=1, center=True).max().to_numpy() + return x + + ArrayIR = pd.DataFrame(ir) + ArrayRed = pd.DataFrame(red) + + # calculate ac/dc ir + max_ir = movmax1(ArrayIR, fs) + # print(f"max_ir: {max_ir}") + min_ir = movmin1(ArrayIR, fs) + # print(f"min_ir: {min_ir}") + baseline_data_ir = (max_ir + min_ir) / 2 + # print(f"baseline_data_ir: {baseline_data_ir}") + acDivDcIr = (max_ir - min_ir) / baseline_data_ir + + # calculate ac/dc red + max_red = movmax1(ArrayRed, fs) + min_red = movmin1(ArrayRed, fs) + baseline_data_red = (max_red + min_red) / 2 + acDivDcRed = (max_red - min_red) / baseline_data_red + + # Plot SPO2 = 110-25*(ac/dc_red)/(ac/dc_ir) + SPO2 = 110 - 25 * (acDivDcRed / acDivDcIr) + # plt.figure("SPO2") + timestamp = np.linspace(0, len(red) / fs, len(red)) + plt.figure(figsize=(10, 5)) + ax1 = plt.subplot(311) + plt.plot(timestamp, red, label='Red Signal', color='red', alpha=0.5) + plt.plot(timestamp, ir, label='IR Signal', color='blue', alpha=0.5) + plt.title('Raw PPG Signals') + plt.xlabel("seconds") + plt.ylabel('Amplitude') + plt.legend() + + plt.subplot(312, sharex=ax1) + plt.plot(timestamp, red - baseline_data_red, label='Detrended Red Signal', color='red', alpha=0.5) + plt.plot(timestamp, ir - baseline_data_ir, label='Detrended IR Signal', color='blue', alpha=0.5) + plt.title('Detrended PPG Signals') + plt.xlabel("seconds") + plt.ylabel('Amplitude') + plt.legend() + + plt.subplot(313, sharex=ax1) + plt.plot(timestamp,acDivDcRed, label='AC/DC Red', color='red', alpha=0.5) + plt.plot(timestamp,acDivDcIr, label='AC/DC IR', color='blue', alpha=0.5) + plt.title('AC/DC Ratios') + plt.xlabel("seconds") + plt.ylabel('Ratio') + plt.legend() + plt.show() + plt.xlabel("Samples") + plt.ylabel("SPO2") + plt.title("SPO2") + plt.plot(timestamp, SPO2) + plt.show() + + + + + diff --git a/process_spo2_alg_info.py b/process_spo2_alg_info.py new file mode 100644 index 0000000..6d323e8 --- /dev/null +++ b/process_spo2_alg_info.py @@ -0,0 +1,100 @@ +import re +import pandas as pd +from pathlib import Path +from typing import List, Dict + + +def parse_line(line: str) -> Dict: + """ + 解析一行数据,返回一个字典 + 例如输入: + [300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60 +[325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;值:81;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:2 R组:;均方根:0; 仪器:60 +[350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 +[375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 +[400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 + """ + line = line.strip() + if not line: + return None + + # 提取时间戳 [300] 这样的 + time_match = re.match(r'\[(\d+)\]', line) + if not time_match: + return None + timestamp = int(time_match.group(1)) + + # 去掉时间戳部分,后面全部是键值对 + content = line[time_match.end():] + content = content.split(" ") + + result = {"timestamp": timestamp} + + for block in content: + parsed_block = parse_block(block) + result.update(parsed_block) + return result + + +def parse_block(block: str) -> Dict: + """ + 解析一个数据块,返回一个字典 + 例如输入: + 心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 + 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 + R组:;均方根:0; + 仪器:60 + """ + data = {} + parts = block.split(';') + if len(parts) == 1: + key, value = parts[0].split(':', 1) + data[key.strip()] = value.strip() + else: + key_0 = parts[0].split(':', 1)[0].strip() + for part in parts: + if len(part.strip()) == 0: + continue + if "结果延迟" in part: + part0, key1, value1 = part.partition('结果延迟:') + data[f"{key_0}_结果延迟"] = value1.strip() + key2, _, value2 = part0.partition(':') + if key2 and value2: + data[f"{key_0}_{key2.strip()}"] = value2.strip() + else: + key, value = part.split(':', 1) + data[f"{key_0}_{key.strip()}"] = value.strip() + + return data + + + +def parse_dat_file(file_path) -> pd.DataFrame: + """ + 读取整个 .dat 文件并返回 pandas DataFrame + """ + data = [] + with open(file_path, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + parsed = parse_line(line) + if parsed: + data.append(parsed) + else: + if line.strip(): + print(f"第 {line_num} 行解析失败,已跳过: {line.strip()[:80]}...") + + df = pd.DataFrame(data) + + # 按时间戳排序(一般已经是顺序,但保险起见) + if not df.empty: + df = df.sort_values("timestamp").reset_index(drop=True) + + return df + +if __name__ == '__main__': + # file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat" + file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat" + file_path = Path(file_path) + df = parse_dat_file(file_path) + + df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig') \ No newline at end of file diff --git a/process_spo2_hr.py b/process_spo2_hr.py new file mode 100644 index 0000000..36d1dc4 --- /dev/null +++ b/process_spo2_hr.py @@ -0,0 +1,52 @@ +'''样例数据 +red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169 +red:9185136;ied:10744763;accX:209;accY:-438;accZ:-171 +red:9182875;ied:10744312;accX:209;accY:-438;accZ:-171 +red:9181788;ied:10744212;accX:209;accY:-441;accZ:-163 +red:9180729;ied:10744341;accX:209;accY:-441;accZ:-163 +red:9179405;ied:10742269;accX:212;accY:-440;accZ:-172 +red:9178563;ied:10741796;accX:212;accY:-440;accZ:-172 +red:9178557;ied:10742843;accX:209;accY:-440;accZ:-166 +red:9178185;ied:10742777;accX:209;accY:-440;accZ:-166 +red:9177053;ied:10737766;accX:210;accY:-440;accZ:-166 +''' + +import numpy as np +import pandas as pd +from pathlib import Path +def parse_dat_line(line: str) -> dict: + """ + 解析一行 .dat 文件内容,返回一个字典 + 例如输入: + red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169 + """ + data = {} + parts = line.strip().split(';') + for part in parts: + if len(part.strip()) == 0: + continue + key, value = part.split(':', 1) + data[key.strip()] = int(value.strip()) + return data +def parse_dat_file(file_path) -> pd.DataFrame: + """ + 读取整个 .dat 文件并返回 pandas DataFrame + :param file_path: + :return: + """ + records = [] + with open(file_path, 'r', encoding='utf-8') as f: + for line in f: + parsed_line = parse_dat_line(line) + records.append(parsed_line) + df = pd.DataFrame(records) + return df + +if __name__ == '__main__': + # file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.dat' + file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.dat' + file_path = Path(file_path) + df = parse_dat_file(file_path) + + + df.to_csv(file_path.with_suffix('.csv'), index=False) \ No newline at end of file diff --git a/spo2_pipeline.py b/spo2_pipeline.py new file mode 100644 index 0000000..a036c46 --- /dev/null +++ b/spo2_pipeline.py @@ -0,0 +1,114 @@ + +import numpy as np +import pandas as pd +from scipy.signal import butter, filtfilt, find_peaks + +def bandpass_filter(x, fs, low=0.5, high=5.0, order=3): + nyq = 0.5 * fs + lown = low / nyq + highn = high / nyq + b, a = butter(order, [lown, highn], btype='band') + y = filtfilt(b, a, x) + return y + +def detrend(x): + return x - np.mean(x) + +def detect_peaks(signal, fs, hr_min=40, hr_max=220): + min_dist = int(np.floor(fs * 60.0 / hr_max)) + prominence = 0.3 * np.std(signal) + peaks, props = find_peaks(signal, distance=min_dist, prominence=prominence) + return peaks, props + +def compute_ac_dc_per_beat(signal, peaks, fs): + n = len(signal) + AC, DC, times, beat_windows = [], [], [], [] + for i, p in enumerate(peaks): + if i == 0: + start = 0 + else: + start = (peaks[i-1] + p) // 2 + if i == len(peaks)-1: + end = n-1 + else: + end = (p + peaks[i+1]) // 2 + segment = signal[start:end+1] + if segment.size < 3: + AC.append(np.nan); DC.append(np.nan) + times.append(p / fs) + beat_windows.append((start, end)) + continue + seg_max = np.max(segment) + seg_min = np.min(segment) + ac = (seg_max - seg_min) / 2.0 + dc = np.mean(segment) + AC.append(ac); DC.append(dc) + times.append(p / fs) + beat_windows.append((start, end)) + return np.array(AC), np.array(DC), np.array(times), beat_windows + +def compute_spo2_from_R(R, A=104.0, B=17.0): + return A - B * R + +def calibrate_linear(R_vals, spo2_ref): + mask = ~np.isnan(R_vals) & ~np.isnan(spo2_ref) + if mask.sum() < 2: + return None + coeffs = np.polyfit(R_vals[mask], spo2_ref[mask], 1) + c1, c0 = coeffs[0], coeffs[1] + A = c0; B = -c1 + return float(A), float(B) + +def spo2_pipeline(red, ir, fs=25, calibrate_with_ref=None, A=104.0, B=17.0): + assert red.shape == ir.shape, "red and ir must have same shape" + red_f = bandpass_filter(red, fs) + ir_f = bandpass_filter(ir, fs) + red_d = detrend(red_f) + ir_d = detrend(ir_f) + peaks, props = detect_peaks(ir_d, fs) + ac_red, dc_red, times, _ = compute_ac_dc_per_beat(red_d, peaks, fs) + ac_ir, dc_ir, _, _ = compute_ac_dc_per_beat(ir_d, peaks, fs) + + with np.errstate(divide='ignore', invalid='ignore'): + ratio_red = ac_red / dc_red + ratio_ir = ac_ir / dc_ir + R = ratio_red / ratio_ir + + spo2_vals = compute_spo2_from_R(R, A=A, B=B) + used_A, used_B = A, B + + # if calibrate_with_ref is not None: + # if len(calibrate_with_ref) != len(R): + # raise ValueError("Reference SpO2 length mismatch with beats.") + # ref = np.array(calibrate_with_ref) + # fit = calibrate_linear(R, ref) + # if fit is not None: + # used_A, used_B = fit + # spo2_vals = compute_spo2_from_R(R, A=used_A, B=used_B) + + df = pd.DataFrame({ + "time_s": times, + "AC_red": ac_red, + "DC_red": dc_red, + "AC_ir": ac_ir, + "DC_ir": dc_ir, + "R": R, + "SpO2": spo2_vals + }) + + df['valid'] = ( + (~np.isnan(df['R'])) & + (df['AC_red']>0) & (df['AC_ir']>0) & + (df['DC_red']>0) & (df['DC_ir']>0) & + (df['SpO2']>50) & (df['SpO2']<100) + ) + + summary = { + "n_beats": len(df), + "n_valid_beats": int(df['valid'].sum()), + "mean_spo2": float(np.nanmean(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'), + "median_spo2": float(np.nanmedian(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'), + "used_A": used_A, + "used_B": used_B + } + return df, summary