Add PPG signal processing and SpO2 calculation modules

This commit is contained in:
marques 2025-11-19 16:27:04 +08:00
commit 667bdc8213
5 changed files with 623 additions and 0 deletions

114
PPG2SpO2.py Normal file
View File

@ -0,0 +1,114 @@
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
os.environ["DISPLAY"] = "localhost:10.0"
from func import calculate_hr_spo2_zhihu, ppg2spo2_pipeline, func_1
from spo2_pipeline import spo2_pipeline
def read_data(file_path):
"""
Read data from a file and return it as a list of floats.
Args:
file_path (str or Path): The path to the file to read.
Returns:
list of float: The data read from the file.
"""
df = pd.read_csv(file_path)
# red = df['red'].values[300:]
# ir = df['ied'].values[300:]
red = df['red'].values[50:]
ir = df['ied'].values[50:]
return red, ir
def read_labels(file_path):
"""
Read labels from a file and return them as a list of integers.
Args:
file_path (str or Path): The path to the file to read.
Returns:
list of int: The labels read from the file.
"""
df = pd.read_csv(file_path)
timestamp = df['timestamp'].values
hr_label = df['心率_值'].values
spo2_label = df['血氧组_值'].values.astype(float)
r_value = df['R组_均方根'].values
np.place(spo2_label, spo2_label == 0, np.nan)
return timestamp, hr_label, spo2_label, r_value
def draw_compare(red_signal, ir_signal, spo2_values, hr_values, r_values, fs=25):
"""
Draw comparison plots for red and IR signals along with SpO2 values.
Args:
red_signal (array-like): The red light signal data.
ir_signal (array-like): The infrared light signal data.
spo2_values (array-like): The computed SpO2 values.
fs (int): Sampling frequency in Hz. Default is 25.
"""
time_axis = [i / fs for i in range(len(red_signal))]
plt.figure(figsize=(15, 10))
plt.subplot(3, 1, 1)
plt.plot(time_axis, red_signal, label='Red Signal', color='red')
plt.plot(time_axis, ir_signal, label='IR Signal', color='blue')
plt.title('Red and IR Signals')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.legend()
plt.subplot(3, 1, 2)
plt.plot(spo2_values, label='Computed SpO2', color='green')
plt.title('Computed SpO2 Values')
plt.xlabel('Beats')
plt.ylabel('SpO2 (%)')
plt.legend()
plt.subplot(3, 1, 3)
plt.plot(hr_values, label='Computed Heart Rate', color='orange')
plt.title('Computed Heart Rate Values')
plt.xlabel('Beats')
plt.ylabel('Heart Rate (bpm)')
plt.legend()
plt.tight_layout()
plt.show()
if __name__ == '__main__':
file_path = Path("./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.csv")
# file_path = "./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.csv"
red_signal, ir_signal = read_data(file_path)
label_path = file_path.parent / "spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.csv"
timestamp, hr_label, spo2_label, r_value = read_labels(label_path)
# draw_compare(red_signal, ir_signal, spo2_label, hr_label, r_value, fs=25)
# df, summary = spo2_pipeline(
# red_signal,
# ir_signal,
# fs=25,
# calibrate_with_ref=spo2_label,
# A=104.0,
# B=17.0
# )
# print(summary)
# print(df)
# HR_result, SpO2_result = ppg2spo2_pipeline(red_signal, ir_signal, fs=25)
func_1(red_signal, ir_signal, fs=25)

243
func.py Normal file
View File

@ -0,0 +1,243 @@
import numpy as np
import pandas as pd
from scipy import signal
from matplotlib import pyplot as plt
def calculate_hr_spo2_zhihu(X, fs, FFT_size=512):
"""
X : ndarray, shape (N, 2) # 第0列 RED第1列 IR
fs : 采样率 (Hz)
FFT_size : FFT 点数建议 2 的幂
返回值 : (Heart_Rate, SpO2_Level) 已经取整
https://zhuanlan.zhihu.com/p/658858641
https://github.com/thinkng/ppgprocessing/blob/master/HR_SpO2_Estimation.m
"""
X = np.asarray(X)
if X.ndim != 2 or X.shape[1] != 2:
raise ValueError("X 必须是 (样本数, 2) 的数组列0=RED列1=IR")
# 计算能处理的完整窗口数量(每个窗口长度 = FFT_size
step = fs # MATLAB 中是 n*fs 开始,步长为 fs1 秒)
n_windows = int((len(X) / (2 * fs)) - 2) # 与原 MATLAB 一致
HEART_RATE = np.zeros(n_windows)
SpO2 = np.zeros(n_windows)
for n in range(n_windows):
start_idx = n * step
end_idx = start_idx + FFT_size
y1 = X[start_idx:end_idx, 0] # RED
y2 = X[start_idx:end_idx, 1] # IR
# ---------- FFT RED ----------
Y1 = np.fft.fft(y1, n=FFT_size)
Y1_abs = np.abs(Y1[:FFT_size//2 + 1])
f1 = fs / 2 * np.linspace(0, 1, FFT_size//2 + 1)
# ---------- FFT IR ----------
Y2 = np.fft.fft(y2, n=FFT_size)
Y2_abs = np.abs(Y2[:FFT_size//2 + 1])
f2 = f1.copy() # 与 f1 完全相同
# ---------- 在 0.5~2.5 Hz对应心率 30~150 bpm范围内找局部最大 ----------
# MATLAB 中索引 6:12 对应频率大约 0.6~1.4 Hz取决于 FFT_size/fs
# 这里统一取频率 0.5~2.5 Hz 对应的索引
idx_range = np.where((f1 >= 0.5) & (f1 <= 2.5))[0]
# RED 峰值索引
segment_red = Y1_abs[idx_range]
local_max_i = np.argmax(segment_red)
pk_RED_i = idx_range[local_max_i]
# IR 峰值索引
segment_ir = Y2_abs[idx_range]
local_max_i = np.argmax(segment_ir)
pk_IR_i = idx_range[local_max_i]
# ---------- 心率 ----------
heart_rate_bpm = f2[pk_IR_i] * 60
HEART_RATE[n] = heart_rate_bpm
# ---------- SpO2 ----------
R_RED = Y1_abs[pk_RED_i] / (Y1_abs[0] + 1e-12) # 防止除以 0
R_IR = Y2_abs[pk_IR_i] / (Y2_abs[0] + 1e-12)
R = R_RED / R_IR
spo2 = 104 - 28 * R
SpO2[n] = spo2
# 去掉首尾(与原 MATLAB 相同)
if len(HEART_RATE) > 2:
HR_mean = np.mean(HEART_RATE[1:-1])
SpO2_mean = np.mean(SpO2[1:-1])
else:
HR_mean = np.mean(HEART_RATE)
SpO2_mean = np.mean(SpO2)
Heart_Rate = round(HR_mean)
SpO2_Level = round(SpO2_mean)
return Heart_Rate, SpO2_Level
def _culculate_spo2(ir_list_data, red_list_data):
ir_dc = min(ir_list_data)
red_dc = min(red_list_data)
ir_ac = max(ir_list_data) - ir_dc
red_ac = max(red_list_data) - red_dc
temp1 = ir_ac * red_dc
if temp1 < 1:
temp1 = 1
R2 = (red_ac * ir_dc) / temp1
SPO2 = -45.060 * R2 * R2 + 30.354 * R2 + 94.845
if SPO2 > 100 or SPO2 < 0:
SPO2 = 0
return SPO2
def _culculate_HR(ir_list_data_filtered, data_list_time):
HR_num = signal.find_peaks(ir_list_data_filtered, distance=10)[0]
time = data_list_time[-1] -data_list_time[0]
HR = len(HR_num) / (time / 1000) * 60
return HR
def process_signal(signal_segment, fs, highpass=True):
if highpass:
h_b, h_a = signal.butter(N=8, Wn=1/(fs/2), btype="highpass", output="ba")
data = signal.filtfilt(h_b, h_a, signal_segment, axis=0)
else:
data = signal_segment
data = signal.detrend(data,
axis=0,
type='linear',
bp=0,
overwrite_data=False)
return data
def ppg2spo2_pipeline(red, ir, fs=25):
"""
采用滑窗分析法计算心率和血氧饱和度
每秒中输出结果
red : ndarray, 红光信号
ir : ndarray, 红外光信号
fs : 采样率 (Hz)
"""
red = np.asarray(red).reshape(-1)
ir = np.asarray(ir).reshape(-1)
if len(red) != len(ir):
raise ValueError("红光和红外光信号长度必须相同")
red_filtered = process_signal(red, fs, highpass=False)
ir_filtered = process_signal(ir, fs, highpass=True)
bpm_list_data = []
spo2_list_data = []
temp_bpm_list_data = []
temp_spo2_list_data = []
for i in range(len(red)//fs - 1):
red_segment = red_filtered[i*fs:(i+1)*fs]
ir_segment = ir_filtered[i*fs:(i+1)*fs]
spo2 = _culculate_spo2(ir_segment, red_segment)
bpm = _culculate_HR(ir_segment, np.arange(i*fs, (i+1)*fs) * (1000/fs))
temp_bpm_list_data.append(bpm)
temp_spo2_list_data.append(spo2)
# matlab
# python
plt.figure(figsize=(10, 5))
timestamp = np.linspace(0, len(red_filtered)/fs, len(red_filtered))
ax1 = plt.subplot(211)
plt.plot( timestamp, red, label='Red Signal', color='red', alpha=0.5)
plt.plot(timestamp,ir, label='IR Signal', color='blue', alpha=0.5)
plt.title('Raw PPG Signals')
plt.xlabel("seconds")
plt.ylabel('Amplitude')
plt.legend()
plt.subplot(212, sharex=ax1)
plt.plot(timestamp,red_filtered, label='Filtered Red Signal', color='red', alpha=0.5)
plt.plot(timestamp,ir_filtered, label='Filtered IR Signal', color='blue', alpha=0.5)
plt.title('Filtered PPG Signals')
plt.xlabel("seconds")
plt.ylabel('Amplitude')
plt.legend()
plt.show()
def func_1(red, ir, fs=25):
# Processing_PPG_Signal
# make a move window find min and max of ArrayIR
def movmin1(A, k):
x = A.rolling(k, min_periods=1, center=True).min().to_numpy() #
return x
def movmax1(A, k):
x = A.rolling(k, min_periods=1, center=True).max().to_numpy()
return x
ArrayIR = pd.DataFrame(ir)
ArrayRed = pd.DataFrame(red)
# calculate ac/dc ir
max_ir = movmax1(ArrayIR, fs)
# print(f"max_ir: {max_ir}")
min_ir = movmin1(ArrayIR, fs)
# print(f"min_ir: {min_ir}")
baseline_data_ir = (max_ir + min_ir) / 2
# print(f"baseline_data_ir: {baseline_data_ir}")
acDivDcIr = (max_ir - min_ir) / baseline_data_ir
# calculate ac/dc red
max_red = movmax1(ArrayRed, fs)
min_red = movmin1(ArrayRed, fs)
baseline_data_red = (max_red + min_red) / 2
acDivDcRed = (max_red - min_red) / baseline_data_red
# Plot SPO2 = 110-25*(ac/dc_red)/(ac/dc_ir)
SPO2 = 110 - 25 * (acDivDcRed / acDivDcIr)
# plt.figure("SPO2")
timestamp = np.linspace(0, len(red) / fs, len(red))
plt.figure(figsize=(10, 5))
ax1 = plt.subplot(311)
plt.plot(timestamp, red, label='Red Signal', color='red', alpha=0.5)
plt.plot(timestamp, ir, label='IR Signal', color='blue', alpha=0.5)
plt.title('Raw PPG Signals')
plt.xlabel("seconds")
plt.ylabel('Amplitude')
plt.legend()
plt.subplot(312, sharex=ax1)
plt.plot(timestamp, red - baseline_data_red, label='Detrended Red Signal', color='red', alpha=0.5)
plt.plot(timestamp, ir - baseline_data_ir, label='Detrended IR Signal', color='blue', alpha=0.5)
plt.title('Detrended PPG Signals')
plt.xlabel("seconds")
plt.ylabel('Amplitude')
plt.legend()
plt.subplot(313, sharex=ax1)
plt.plot(timestamp,acDivDcRed, label='AC/DC Red', color='red', alpha=0.5)
plt.plot(timestamp,acDivDcIr, label='AC/DC IR', color='blue', alpha=0.5)
plt.title('AC/DC Ratios')
plt.xlabel("seconds")
plt.ylabel('Ratio')
plt.legend()
plt.show()
plt.xlabel("Samples")
plt.ylabel("SPO2")
plt.title("SPO2")
plt.plot(timestamp, SPO2)
plt.show()

100
process_spo2_alg_info.py Normal file
View File

@ -0,0 +1,100 @@
import re
import pandas as pd
from pathlib import Path
from typing import List, Dict
def parse_line(line: str) -> Dict:
"""
解析一行数据返回一个字典
例如输入:
[300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60
[325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;:81;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:2 R组:;均方根:0; 仪器:60
[350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
[375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
[400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
"""
line = line.strip()
if not line:
return None
# 提取时间戳 [300] 这样的
time_match = re.match(r'\[(\d+)\]', line)
if not time_match:
return None
timestamp = int(time_match.group(1))
# 去掉时间戳部分,后面全部是键值对
content = line[time_match.end():]
content = content.split(" ")
result = {"timestamp": timestamp}
for block in content:
parsed_block = parse_block(block)
result.update(parsed_block)
return result
def parse_block(block: str) -> Dict:
"""
解析一个数据块返回一个字典
例如输入:
心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;:82;信任级别:1
血氧组:;标准差1:0;标准差:0;均方根:0;:0;信任级别:0结果延迟:1
R组:;均方根:0;
仪器:60
"""
data = {}
parts = block.split(';')
if len(parts) == 1:
key, value = parts[0].split(':', 1)
data[key.strip()] = value.strip()
else:
key_0 = parts[0].split(':', 1)[0].strip()
for part in parts:
if len(part.strip()) == 0:
continue
if "结果延迟" in part:
part0, key1, value1 = part.partition('结果延迟:')
data[f"{key_0}_结果延迟"] = value1.strip()
key2, _, value2 = part0.partition(':')
if key2 and value2:
data[f"{key_0}_{key2.strip()}"] = value2.strip()
else:
key, value = part.split(':', 1)
data[f"{key_0}_{key.strip()}"] = value.strip()
return data
def parse_dat_file(file_path) -> pd.DataFrame:
"""
读取整个 .dat 文件并返回 pandas DataFrame
"""
data = []
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
parsed = parse_line(line)
if parsed:
data.append(parsed)
else:
if line.strip():
print(f"{line_num} 行解析失败,已跳过: {line.strip()[:80]}...")
df = pd.DataFrame(data)
# 按时间戳排序(一般已经是顺序,但保险起见)
if not df.empty:
df = df.sort_values("timestamp").reset_index(drop=True)
return df
if __name__ == '__main__':
# file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat"
file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat"
file_path = Path(file_path)
df = parse_dat_file(file_path)
df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig')

52
process_spo2_hr.py Normal file
View File

@ -0,0 +1,52 @@
'''样例数据
red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
red:9185136;ied:10744763;accX:209;accY:-438;accZ:-171
red:9182875;ied:10744312;accX:209;accY:-438;accZ:-171
red:9181788;ied:10744212;accX:209;accY:-441;accZ:-163
red:9180729;ied:10744341;accX:209;accY:-441;accZ:-163
red:9179405;ied:10742269;accX:212;accY:-440;accZ:-172
red:9178563;ied:10741796;accX:212;accY:-440;accZ:-172
red:9178557;ied:10742843;accX:209;accY:-440;accZ:-166
red:9178185;ied:10742777;accX:209;accY:-440;accZ:-166
red:9177053;ied:10737766;accX:210;accY:-440;accZ:-166
'''
import numpy as np
import pandas as pd
from pathlib import Path
def parse_dat_line(line: str) -> dict:
"""
解析一行 .dat 文件内容返回一个字典
例如输入:
red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
"""
data = {}
parts = line.strip().split(';')
for part in parts:
if len(part.strip()) == 0:
continue
key, value = part.split(':', 1)
data[key.strip()] = int(value.strip())
return data
def parse_dat_file(file_path) -> pd.DataFrame:
"""
读取整个 .dat 文件并返回 pandas DataFrame
:param file_path:
:return:
"""
records = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
parsed_line = parse_dat_line(line)
records.append(parsed_line)
df = pd.DataFrame(records)
return df
if __name__ == '__main__':
# file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.dat'
file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.dat'
file_path = Path(file_path)
df = parse_dat_file(file_path)
df.to_csv(file_path.with_suffix('.csv'), index=False)

114
spo2_pipeline.py Normal file
View File

@ -0,0 +1,114 @@
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, find_peaks
def bandpass_filter(x, fs, low=0.5, high=5.0, order=3):
nyq = 0.5 * fs
lown = low / nyq
highn = high / nyq
b, a = butter(order, [lown, highn], btype='band')
y = filtfilt(b, a, x)
return y
def detrend(x):
return x - np.mean(x)
def detect_peaks(signal, fs, hr_min=40, hr_max=220):
min_dist = int(np.floor(fs * 60.0 / hr_max))
prominence = 0.3 * np.std(signal)
peaks, props = find_peaks(signal, distance=min_dist, prominence=prominence)
return peaks, props
def compute_ac_dc_per_beat(signal, peaks, fs):
n = len(signal)
AC, DC, times, beat_windows = [], [], [], []
for i, p in enumerate(peaks):
if i == 0:
start = 0
else:
start = (peaks[i-1] + p) // 2
if i == len(peaks)-1:
end = n-1
else:
end = (p + peaks[i+1]) // 2
segment = signal[start:end+1]
if segment.size < 3:
AC.append(np.nan); DC.append(np.nan)
times.append(p / fs)
beat_windows.append((start, end))
continue
seg_max = np.max(segment)
seg_min = np.min(segment)
ac = (seg_max - seg_min) / 2.0
dc = np.mean(segment)
AC.append(ac); DC.append(dc)
times.append(p / fs)
beat_windows.append((start, end))
return np.array(AC), np.array(DC), np.array(times), beat_windows
def compute_spo2_from_R(R, A=104.0, B=17.0):
return A - B * R
def calibrate_linear(R_vals, spo2_ref):
mask = ~np.isnan(R_vals) & ~np.isnan(spo2_ref)
if mask.sum() < 2:
return None
coeffs = np.polyfit(R_vals[mask], spo2_ref[mask], 1)
c1, c0 = coeffs[0], coeffs[1]
A = c0; B = -c1
return float(A), float(B)
def spo2_pipeline(red, ir, fs=25, calibrate_with_ref=None, A=104.0, B=17.0):
assert red.shape == ir.shape, "red and ir must have same shape"
red_f = bandpass_filter(red, fs)
ir_f = bandpass_filter(ir, fs)
red_d = detrend(red_f)
ir_d = detrend(ir_f)
peaks, props = detect_peaks(ir_d, fs)
ac_red, dc_red, times, _ = compute_ac_dc_per_beat(red_d, peaks, fs)
ac_ir, dc_ir, _, _ = compute_ac_dc_per_beat(ir_d, peaks, fs)
with np.errstate(divide='ignore', invalid='ignore'):
ratio_red = ac_red / dc_red
ratio_ir = ac_ir / dc_ir
R = ratio_red / ratio_ir
spo2_vals = compute_spo2_from_R(R, A=A, B=B)
used_A, used_B = A, B
# if calibrate_with_ref is not None:
# if len(calibrate_with_ref) != len(R):
# raise ValueError("Reference SpO2 length mismatch with beats.")
# ref = np.array(calibrate_with_ref)
# fit = calibrate_linear(R, ref)
# if fit is not None:
# used_A, used_B = fit
# spo2_vals = compute_spo2_from_R(R, A=used_A, B=used_B)
df = pd.DataFrame({
"time_s": times,
"AC_red": ac_red,
"DC_red": dc_red,
"AC_ir": ac_ir,
"DC_ir": dc_ir,
"R": R,
"SpO2": spo2_vals
})
df['valid'] = (
(~np.isnan(df['R'])) &
(df['AC_red']>0) & (df['AC_ir']>0) &
(df['DC_red']>0) & (df['DC_ir']>0) &
(df['SpO2']>50) & (df['SpO2']<100)
)
summary = {
"n_beats": len(df),
"n_valid_beats": int(df['valid'].sum()),
"mean_spo2": float(np.nanmean(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
"median_spo2": float(np.nanmedian(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
"used_A": used_A,
"used_B": used_B
}
return df, summary