Add PPG signal processing and SpO2 calculation modules
This commit is contained in:
commit
667bdc8213
114
PPG2SpO2.py
Normal file
114
PPG2SpO2.py
Normal file
@ -0,0 +1,114 @@
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
import os
|
||||
os.environ["DISPLAY"] = "localhost:10.0"
|
||||
from func import calculate_hr_spo2_zhihu, ppg2spo2_pipeline, func_1
|
||||
from spo2_pipeline import spo2_pipeline
|
||||
|
||||
def read_data(file_path):
|
||||
"""
|
||||
Read data from a file and return it as a list of floats.
|
||||
|
||||
Args:
|
||||
file_path (str or Path): The path to the file to read.
|
||||
Returns:
|
||||
list of float: The data read from the file.
|
||||
"""
|
||||
df = pd.read_csv(file_path)
|
||||
# red = df['red'].values[300:]
|
||||
# ir = df['ied'].values[300:]
|
||||
red = df['red'].values[50:]
|
||||
ir = df['ied'].values[50:]
|
||||
return red, ir
|
||||
|
||||
|
||||
def read_labels(file_path):
|
||||
"""
|
||||
Read labels from a file and return them as a list of integers.
|
||||
|
||||
Args:
|
||||
file_path (str or Path): The path to the file to read.
|
||||
Returns:
|
||||
list of int: The labels read from the file.
|
||||
"""
|
||||
df = pd.read_csv(file_path)
|
||||
timestamp = df['timestamp'].values
|
||||
hr_label = df['心率_值'].values
|
||||
spo2_label = df['血氧组_值'].values.astype(float)
|
||||
r_value = df['R组_均方根'].values
|
||||
|
||||
np.place(spo2_label, spo2_label == 0, np.nan)
|
||||
|
||||
return timestamp, hr_label, spo2_label, r_value
|
||||
|
||||
|
||||
def draw_compare(red_signal, ir_signal, spo2_values, hr_values, r_values, fs=25):
|
||||
"""
|
||||
Draw comparison plots for red and IR signals along with SpO2 values.
|
||||
|
||||
Args:
|
||||
red_signal (array-like): The red light signal data.
|
||||
ir_signal (array-like): The infrared light signal data.
|
||||
spo2_values (array-like): The computed SpO2 values.
|
||||
fs (int): Sampling frequency in Hz. Default is 25.
|
||||
"""
|
||||
time_axis = [i / fs for i in range(len(red_signal))]
|
||||
|
||||
plt.figure(figsize=(15, 10))
|
||||
|
||||
plt.subplot(3, 1, 1)
|
||||
plt.plot(time_axis, red_signal, label='Red Signal', color='red')
|
||||
plt.plot(time_axis, ir_signal, label='IR Signal', color='blue')
|
||||
plt.title('Red and IR Signals')
|
||||
plt.xlabel('Time (s)')
|
||||
plt.ylabel('Amplitude')
|
||||
plt.legend()
|
||||
|
||||
plt.subplot(3, 1, 2)
|
||||
plt.plot(spo2_values, label='Computed SpO2', color='green')
|
||||
plt.title('Computed SpO2 Values')
|
||||
plt.xlabel('Beats')
|
||||
plt.ylabel('SpO2 (%)')
|
||||
plt.legend()
|
||||
|
||||
plt.subplot(3, 1, 3)
|
||||
plt.plot(hr_values, label='Computed Heart Rate', color='orange')
|
||||
plt.title('Computed Heart Rate Values')
|
||||
plt.xlabel('Beats')
|
||||
plt.ylabel('Heart Rate (bpm)')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file_path = Path("./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.csv")
|
||||
# file_path = "./data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.csv"
|
||||
red_signal, ir_signal = read_data(file_path)
|
||||
|
||||
label_path = file_path.parent / "spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.csv"
|
||||
timestamp, hr_label, spo2_label, r_value = read_labels(label_path)
|
||||
|
||||
# draw_compare(red_signal, ir_signal, spo2_label, hr_label, r_value, fs=25)
|
||||
# df, summary = spo2_pipeline(
|
||||
# red_signal,
|
||||
# ir_signal,
|
||||
# fs=25,
|
||||
# calibrate_with_ref=spo2_label,
|
||||
# A=104.0,
|
||||
# B=17.0
|
||||
# )
|
||||
# print(summary)
|
||||
# print(df)
|
||||
# HR_result, SpO2_result = ppg2spo2_pipeline(red_signal, ir_signal, fs=25)
|
||||
|
||||
func_1(red_signal, ir_signal, fs=25)
|
||||
|
||||
|
||||
|
||||
243
func.py
Normal file
243
func.py
Normal file
@ -0,0 +1,243 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import signal
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
def calculate_hr_spo2_zhihu(X, fs, FFT_size=512):
|
||||
"""
|
||||
X : ndarray, shape (N, 2) # 第0列 RED,第1列 IR
|
||||
fs : 采样率 (Hz)
|
||||
FFT_size : FFT 点数(建议 2 的幂)
|
||||
返回值 : (Heart_Rate, SpO2_Level) 已经取整
|
||||
https://zhuanlan.zhihu.com/p/658858641
|
||||
https://github.com/thinkng/ppgprocessing/blob/master/HR_SpO2_Estimation.m
|
||||
"""
|
||||
X = np.asarray(X)
|
||||
if X.ndim != 2 or X.shape[1] != 2:
|
||||
raise ValueError("X 必须是 (样本数, 2) 的数组,列0=RED,列1=IR")
|
||||
|
||||
# 计算能处理的完整窗口数量(每个窗口长度 = FFT_size)
|
||||
step = fs # MATLAB 中是 n*fs 开始,步长为 fs(1 秒)
|
||||
n_windows = int((len(X) / (2 * fs)) - 2) # 与原 MATLAB 一致
|
||||
|
||||
HEART_RATE = np.zeros(n_windows)
|
||||
SpO2 = np.zeros(n_windows)
|
||||
|
||||
for n in range(n_windows):
|
||||
start_idx = n * step
|
||||
end_idx = start_idx + FFT_size
|
||||
y1 = X[start_idx:end_idx, 0] # RED
|
||||
y2 = X[start_idx:end_idx, 1] # IR
|
||||
|
||||
# ---------- FFT RED ----------
|
||||
Y1 = np.fft.fft(y1, n=FFT_size)
|
||||
Y1_abs = np.abs(Y1[:FFT_size//2 + 1])
|
||||
f1 = fs / 2 * np.linspace(0, 1, FFT_size//2 + 1)
|
||||
|
||||
# ---------- FFT IR ----------
|
||||
Y2 = np.fft.fft(y2, n=FFT_size)
|
||||
Y2_abs = np.abs(Y2[:FFT_size//2 + 1])
|
||||
f2 = f1.copy() # 与 f1 完全相同
|
||||
|
||||
# ---------- 在 0.5~2.5 Hz(对应心率 30~150 bpm)范围内找局部最大 ----------
|
||||
# MATLAB 中索引 6:12 对应频率大约 0.6~1.4 Hz(取决于 FFT_size/fs)
|
||||
# 这里统一取频率 0.5~2.5 Hz 对应的索引
|
||||
idx_range = np.where((f1 >= 0.5) & (f1 <= 2.5))[0]
|
||||
|
||||
# RED 峰值索引
|
||||
segment_red = Y1_abs[idx_range]
|
||||
local_max_i = np.argmax(segment_red)
|
||||
pk_RED_i = idx_range[local_max_i]
|
||||
|
||||
# IR 峰值索引
|
||||
segment_ir = Y2_abs[idx_range]
|
||||
local_max_i = np.argmax(segment_ir)
|
||||
pk_IR_i = idx_range[local_max_i]
|
||||
|
||||
# ---------- 心率 ----------
|
||||
heart_rate_bpm = f2[pk_IR_i] * 60
|
||||
HEART_RATE[n] = heart_rate_bpm
|
||||
|
||||
# ---------- SpO2 ----------
|
||||
R_RED = Y1_abs[pk_RED_i] / (Y1_abs[0] + 1e-12) # 防止除以 0
|
||||
R_IR = Y2_abs[pk_IR_i] / (Y2_abs[0] + 1e-12)
|
||||
R = R_RED / R_IR
|
||||
spo2 = 104 - 28 * R
|
||||
SpO2[n] = spo2
|
||||
|
||||
# 去掉首尾(与原 MATLAB 相同)
|
||||
if len(HEART_RATE) > 2:
|
||||
HR_mean = np.mean(HEART_RATE[1:-1])
|
||||
SpO2_mean = np.mean(SpO2[1:-1])
|
||||
else:
|
||||
HR_mean = np.mean(HEART_RATE)
|
||||
SpO2_mean = np.mean(SpO2)
|
||||
|
||||
Heart_Rate = round(HR_mean)
|
||||
SpO2_Level = round(SpO2_mean)
|
||||
|
||||
return Heart_Rate, SpO2_Level
|
||||
|
||||
|
||||
|
||||
def _culculate_spo2(ir_list_data, red_list_data):
|
||||
ir_dc = min(ir_list_data)
|
||||
red_dc = min(red_list_data)
|
||||
ir_ac = max(ir_list_data) - ir_dc
|
||||
red_ac = max(red_list_data) - red_dc
|
||||
temp1 = ir_ac * red_dc
|
||||
if temp1 < 1:
|
||||
temp1 = 1
|
||||
R2 = (red_ac * ir_dc) / temp1
|
||||
SPO2 = -45.060 * R2 * R2 + 30.354 * R2 + 94.845
|
||||
if SPO2 > 100 or SPO2 < 0:
|
||||
SPO2 = 0
|
||||
return SPO2
|
||||
|
||||
def _culculate_HR(ir_list_data_filtered, data_list_time):
|
||||
HR_num = signal.find_peaks(ir_list_data_filtered, distance=10)[0]
|
||||
time = data_list_time[-1] -data_list_time[0]
|
||||
HR = len(HR_num) / (time / 1000) * 60
|
||||
return HR
|
||||
|
||||
|
||||
def process_signal(signal_segment, fs, highpass=True):
|
||||
if highpass:
|
||||
h_b, h_a = signal.butter(N=8, Wn=1/(fs/2), btype="highpass", output="ba")
|
||||
data = signal.filtfilt(h_b, h_a, signal_segment, axis=0)
|
||||
else:
|
||||
data = signal_segment
|
||||
data = signal.detrend(data,
|
||||
axis=0,
|
||||
type='linear',
|
||||
bp=0,
|
||||
overwrite_data=False)
|
||||
return data
|
||||
|
||||
|
||||
def ppg2spo2_pipeline(red, ir, fs=25):
|
||||
"""
|
||||
采用滑窗分析法计算心率和血氧饱和度
|
||||
每秒中输出结果
|
||||
|
||||
red : ndarray, 红光信号
|
||||
ir : ndarray, 红外光信号
|
||||
fs : 采样率 (Hz)
|
||||
"""
|
||||
|
||||
red = np.asarray(red).reshape(-1)
|
||||
ir = np.asarray(ir).reshape(-1)
|
||||
|
||||
|
||||
if len(red) != len(ir):
|
||||
raise ValueError("红光和红外光信号长度必须相同")
|
||||
|
||||
red_filtered = process_signal(red, fs, highpass=False)
|
||||
ir_filtered = process_signal(ir, fs, highpass=True)
|
||||
|
||||
bpm_list_data = []
|
||||
spo2_list_data = []
|
||||
temp_bpm_list_data = []
|
||||
temp_spo2_list_data = []
|
||||
|
||||
for i in range(len(red)//fs - 1):
|
||||
red_segment = red_filtered[i*fs:(i+1)*fs]
|
||||
ir_segment = ir_filtered[i*fs:(i+1)*fs]
|
||||
spo2 = _culculate_spo2(ir_segment, red_segment)
|
||||
bpm = _culculate_HR(ir_segment, np.arange(i*fs, (i+1)*fs) * (1000/fs))
|
||||
temp_bpm_list_data.append(bpm)
|
||||
temp_spo2_list_data.append(spo2)
|
||||
# matlab
|
||||
|
||||
# python
|
||||
|
||||
|
||||
plt.figure(figsize=(10, 5))
|
||||
timestamp = np.linspace(0, len(red_filtered)/fs, len(red_filtered))
|
||||
ax1 = plt.subplot(211)
|
||||
plt.plot( timestamp, red, label='Red Signal', color='red', alpha=0.5)
|
||||
plt.plot(timestamp,ir, label='IR Signal', color='blue', alpha=0.5)
|
||||
plt.title('Raw PPG Signals')
|
||||
plt.xlabel("seconds")
|
||||
plt.ylabel('Amplitude')
|
||||
plt.legend()
|
||||
plt.subplot(212, sharex=ax1)
|
||||
plt.plot(timestamp,red_filtered, label='Filtered Red Signal', color='red', alpha=0.5)
|
||||
plt.plot(timestamp,ir_filtered, label='Filtered IR Signal', color='blue', alpha=0.5)
|
||||
plt.title('Filtered PPG Signals')
|
||||
plt.xlabel("seconds")
|
||||
plt.ylabel('Amplitude')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
def func_1(red, ir, fs=25):
|
||||
# Processing_PPG_Signal
|
||||
# make a move window find min and max of ArrayIR
|
||||
def movmin1(A, k):
|
||||
x = A.rolling(k, min_periods=1, center=True).min().to_numpy() #
|
||||
return x
|
||||
|
||||
def movmax1(A, k):
|
||||
x = A.rolling(k, min_periods=1, center=True).max().to_numpy()
|
||||
return x
|
||||
|
||||
ArrayIR = pd.DataFrame(ir)
|
||||
ArrayRed = pd.DataFrame(red)
|
||||
|
||||
# calculate ac/dc ir
|
||||
max_ir = movmax1(ArrayIR, fs)
|
||||
# print(f"max_ir: {max_ir}")
|
||||
min_ir = movmin1(ArrayIR, fs)
|
||||
# print(f"min_ir: {min_ir}")
|
||||
baseline_data_ir = (max_ir + min_ir) / 2
|
||||
# print(f"baseline_data_ir: {baseline_data_ir}")
|
||||
acDivDcIr = (max_ir - min_ir) / baseline_data_ir
|
||||
|
||||
# calculate ac/dc red
|
||||
max_red = movmax1(ArrayRed, fs)
|
||||
min_red = movmin1(ArrayRed, fs)
|
||||
baseline_data_red = (max_red + min_red) / 2
|
||||
acDivDcRed = (max_red - min_red) / baseline_data_red
|
||||
|
||||
# Plot SPO2 = 110-25*(ac/dc_red)/(ac/dc_ir)
|
||||
SPO2 = 110 - 25 * (acDivDcRed / acDivDcIr)
|
||||
# plt.figure("SPO2")
|
||||
timestamp = np.linspace(0, len(red) / fs, len(red))
|
||||
plt.figure(figsize=(10, 5))
|
||||
ax1 = plt.subplot(311)
|
||||
plt.plot(timestamp, red, label='Red Signal', color='red', alpha=0.5)
|
||||
plt.plot(timestamp, ir, label='IR Signal', color='blue', alpha=0.5)
|
||||
plt.title('Raw PPG Signals')
|
||||
plt.xlabel("seconds")
|
||||
plt.ylabel('Amplitude')
|
||||
plt.legend()
|
||||
|
||||
plt.subplot(312, sharex=ax1)
|
||||
plt.plot(timestamp, red - baseline_data_red, label='Detrended Red Signal', color='red', alpha=0.5)
|
||||
plt.plot(timestamp, ir - baseline_data_ir, label='Detrended IR Signal', color='blue', alpha=0.5)
|
||||
plt.title('Detrended PPG Signals')
|
||||
plt.xlabel("seconds")
|
||||
plt.ylabel('Amplitude')
|
||||
plt.legend()
|
||||
|
||||
plt.subplot(313, sharex=ax1)
|
||||
plt.plot(timestamp,acDivDcRed, label='AC/DC Red', color='red', alpha=0.5)
|
||||
plt.plot(timestamp,acDivDcIr, label='AC/DC IR', color='blue', alpha=0.5)
|
||||
plt.title('AC/DC Ratios')
|
||||
plt.xlabel("seconds")
|
||||
plt.ylabel('Ratio')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
plt.xlabel("Samples")
|
||||
plt.ylabel("SPO2")
|
||||
plt.title("SPO2")
|
||||
plt.plot(timestamp, SPO2)
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
100
process_spo2_alg_info.py
Normal file
100
process_spo2_alg_info.py
Normal file
@ -0,0 +1,100 @@
|
||||
import re
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
def parse_line(line: str) -> Dict:
|
||||
"""
|
||||
解析一行数据,返回一个字典
|
||||
例如输入:
|
||||
[300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60
|
||||
[325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;值:81;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:2 R组:;均方根:0; 仪器:60
|
||||
[350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
|
||||
[375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
|
||||
[400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
|
||||
"""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return None
|
||||
|
||||
# 提取时间戳 [300] 这样的
|
||||
time_match = re.match(r'\[(\d+)\]', line)
|
||||
if not time_match:
|
||||
return None
|
||||
timestamp = int(time_match.group(1))
|
||||
|
||||
# 去掉时间戳部分,后面全部是键值对
|
||||
content = line[time_match.end():]
|
||||
content = content.split(" ")
|
||||
|
||||
result = {"timestamp": timestamp}
|
||||
|
||||
for block in content:
|
||||
parsed_block = parse_block(block)
|
||||
result.update(parsed_block)
|
||||
return result
|
||||
|
||||
|
||||
def parse_block(block: str) -> Dict:
|
||||
"""
|
||||
解析一个数据块,返回一个字典
|
||||
例如输入:
|
||||
心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1
|
||||
血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1
|
||||
R组:;均方根:0;
|
||||
仪器:60
|
||||
"""
|
||||
data = {}
|
||||
parts = block.split(';')
|
||||
if len(parts) == 1:
|
||||
key, value = parts[0].split(':', 1)
|
||||
data[key.strip()] = value.strip()
|
||||
else:
|
||||
key_0 = parts[0].split(':', 1)[0].strip()
|
||||
for part in parts:
|
||||
if len(part.strip()) == 0:
|
||||
continue
|
||||
if "结果延迟" in part:
|
||||
part0, key1, value1 = part.partition('结果延迟:')
|
||||
data[f"{key_0}_结果延迟"] = value1.strip()
|
||||
key2, _, value2 = part0.partition(':')
|
||||
if key2 and value2:
|
||||
data[f"{key_0}_{key2.strip()}"] = value2.strip()
|
||||
else:
|
||||
key, value = part.split(':', 1)
|
||||
data[f"{key_0}_{key.strip()}"] = value.strip()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
||||
def parse_dat_file(file_path) -> pd.DataFrame:
|
||||
"""
|
||||
读取整个 .dat 文件并返回 pandas DataFrame
|
||||
"""
|
||||
data = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
parsed = parse_line(line)
|
||||
if parsed:
|
||||
data.append(parsed)
|
||||
else:
|
||||
if line.strip():
|
||||
print(f"第 {line_num} 行解析失败,已跳过: {line.strip()[:80]}...")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# 按时间戳排序(一般已经是顺序,但保险起见)
|
||||
if not df.empty:
|
||||
df = df.sort_values("timestamp").reset_index(drop=True)
|
||||
|
||||
return df
|
||||
|
||||
if __name__ == '__main__':
|
||||
# file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat"
|
||||
file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat"
|
||||
file_path = Path(file_path)
|
||||
df = parse_dat_file(file_path)
|
||||
|
||||
df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig')
|
||||
52
process_spo2_hr.py
Normal file
52
process_spo2_hr.py
Normal file
@ -0,0 +1,52 @@
|
||||
'''样例数据
|
||||
red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
|
||||
red:9185136;ied:10744763;accX:209;accY:-438;accZ:-171
|
||||
red:9182875;ied:10744312;accX:209;accY:-438;accZ:-171
|
||||
red:9181788;ied:10744212;accX:209;accY:-441;accZ:-163
|
||||
red:9180729;ied:10744341;accX:209;accY:-441;accZ:-163
|
||||
red:9179405;ied:10742269;accX:212;accY:-440;accZ:-172
|
||||
red:9178563;ied:10741796;accX:212;accY:-440;accZ:-172
|
||||
red:9178557;ied:10742843;accX:209;accY:-440;accZ:-166
|
||||
red:9178185;ied:10742777;accX:209;accY:-440;accZ:-166
|
||||
red:9177053;ied:10737766;accX:210;accY:-440;accZ:-166
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
def parse_dat_line(line: str) -> dict:
|
||||
"""
|
||||
解析一行 .dat 文件内容,返回一个字典
|
||||
例如输入:
|
||||
red:9188561;ied:10745056;accX:209;accY:-438;accZ:-169
|
||||
"""
|
||||
data = {}
|
||||
parts = line.strip().split(';')
|
||||
for part in parts:
|
||||
if len(part.strip()) == 0:
|
||||
continue
|
||||
key, value = part.split(':', 1)
|
||||
data[key.strip()] = int(value.strip())
|
||||
return data
|
||||
def parse_dat_file(file_path) -> pd.DataFrame:
|
||||
"""
|
||||
读取整个 .dat 文件并返回 pandas DataFrame
|
||||
:param file_path:
|
||||
:return:
|
||||
"""
|
||||
records = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
parsed_line = parse_dat_line(line)
|
||||
records.append(parsed_line)
|
||||
df = pd.DataFrame(records)
|
||||
return df
|
||||
|
||||
if __name__ == '__main__':
|
||||
# file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.01.13.dat'
|
||||
file_path = './data/spo2hr[42-A3-C5-2F-F7-32]2025.11.17_17.12.15.dat'
|
||||
file_path = Path(file_path)
|
||||
df = parse_dat_file(file_path)
|
||||
|
||||
|
||||
df.to_csv(file_path.with_suffix('.csv'), index=False)
|
||||
114
spo2_pipeline.py
Normal file
114
spo2_pipeline.py
Normal file
@ -0,0 +1,114 @@
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.signal import butter, filtfilt, find_peaks
|
||||
|
||||
def bandpass_filter(x, fs, low=0.5, high=5.0, order=3):
|
||||
nyq = 0.5 * fs
|
||||
lown = low / nyq
|
||||
highn = high / nyq
|
||||
b, a = butter(order, [lown, highn], btype='band')
|
||||
y = filtfilt(b, a, x)
|
||||
return y
|
||||
|
||||
def detrend(x):
|
||||
return x - np.mean(x)
|
||||
|
||||
def detect_peaks(signal, fs, hr_min=40, hr_max=220):
|
||||
min_dist = int(np.floor(fs * 60.0 / hr_max))
|
||||
prominence = 0.3 * np.std(signal)
|
||||
peaks, props = find_peaks(signal, distance=min_dist, prominence=prominence)
|
||||
return peaks, props
|
||||
|
||||
def compute_ac_dc_per_beat(signal, peaks, fs):
|
||||
n = len(signal)
|
||||
AC, DC, times, beat_windows = [], [], [], []
|
||||
for i, p in enumerate(peaks):
|
||||
if i == 0:
|
||||
start = 0
|
||||
else:
|
||||
start = (peaks[i-1] + p) // 2
|
||||
if i == len(peaks)-1:
|
||||
end = n-1
|
||||
else:
|
||||
end = (p + peaks[i+1]) // 2
|
||||
segment = signal[start:end+1]
|
||||
if segment.size < 3:
|
||||
AC.append(np.nan); DC.append(np.nan)
|
||||
times.append(p / fs)
|
||||
beat_windows.append((start, end))
|
||||
continue
|
||||
seg_max = np.max(segment)
|
||||
seg_min = np.min(segment)
|
||||
ac = (seg_max - seg_min) / 2.0
|
||||
dc = np.mean(segment)
|
||||
AC.append(ac); DC.append(dc)
|
||||
times.append(p / fs)
|
||||
beat_windows.append((start, end))
|
||||
return np.array(AC), np.array(DC), np.array(times), beat_windows
|
||||
|
||||
def compute_spo2_from_R(R, A=104.0, B=17.0):
|
||||
return A - B * R
|
||||
|
||||
def calibrate_linear(R_vals, spo2_ref):
|
||||
mask = ~np.isnan(R_vals) & ~np.isnan(spo2_ref)
|
||||
if mask.sum() < 2:
|
||||
return None
|
||||
coeffs = np.polyfit(R_vals[mask], spo2_ref[mask], 1)
|
||||
c1, c0 = coeffs[0], coeffs[1]
|
||||
A = c0; B = -c1
|
||||
return float(A), float(B)
|
||||
|
||||
def spo2_pipeline(red, ir, fs=25, calibrate_with_ref=None, A=104.0, B=17.0):
|
||||
assert red.shape == ir.shape, "red and ir must have same shape"
|
||||
red_f = bandpass_filter(red, fs)
|
||||
ir_f = bandpass_filter(ir, fs)
|
||||
red_d = detrend(red_f)
|
||||
ir_d = detrend(ir_f)
|
||||
peaks, props = detect_peaks(ir_d, fs)
|
||||
ac_red, dc_red, times, _ = compute_ac_dc_per_beat(red_d, peaks, fs)
|
||||
ac_ir, dc_ir, _, _ = compute_ac_dc_per_beat(ir_d, peaks, fs)
|
||||
|
||||
with np.errstate(divide='ignore', invalid='ignore'):
|
||||
ratio_red = ac_red / dc_red
|
||||
ratio_ir = ac_ir / dc_ir
|
||||
R = ratio_red / ratio_ir
|
||||
|
||||
spo2_vals = compute_spo2_from_R(R, A=A, B=B)
|
||||
used_A, used_B = A, B
|
||||
|
||||
# if calibrate_with_ref is not None:
|
||||
# if len(calibrate_with_ref) != len(R):
|
||||
# raise ValueError("Reference SpO2 length mismatch with beats.")
|
||||
# ref = np.array(calibrate_with_ref)
|
||||
# fit = calibrate_linear(R, ref)
|
||||
# if fit is not None:
|
||||
# used_A, used_B = fit
|
||||
# spo2_vals = compute_spo2_from_R(R, A=used_A, B=used_B)
|
||||
|
||||
df = pd.DataFrame({
|
||||
"time_s": times,
|
||||
"AC_red": ac_red,
|
||||
"DC_red": dc_red,
|
||||
"AC_ir": ac_ir,
|
||||
"DC_ir": dc_ir,
|
||||
"R": R,
|
||||
"SpO2": spo2_vals
|
||||
})
|
||||
|
||||
df['valid'] = (
|
||||
(~np.isnan(df['R'])) &
|
||||
(df['AC_red']>0) & (df['AC_ir']>0) &
|
||||
(df['DC_red']>0) & (df['DC_ir']>0) &
|
||||
(df['SpO2']>50) & (df['SpO2']<100)
|
||||
)
|
||||
|
||||
summary = {
|
||||
"n_beats": len(df),
|
||||
"n_valid_beats": int(df['valid'].sum()),
|
||||
"mean_spo2": float(np.nanmean(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
|
||||
"median_spo2": float(np.nanmedian(df['SpO2'][df['valid']])) if df['valid'].any() else float('nan'),
|
||||
"used_A": used_A,
|
||||
"used_B": used_B
|
||||
}
|
||||
return df, summary
|
||||
Loading…
Reference in New Issue
Block a user