PPG_SpO2/process_spo2_alg_info.py

100 lines
3.9 KiB
Python

import re
import pandas as pd
from pathlib import Path
from typing import List, Dict
def parse_line(line: str) -> Dict:
"""
解析一行数据,返回一个字典
例如输入:
[300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60
[325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;值:81;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:2 R组:;均方根:0; 仪器:60
[350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
[375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
[400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60
"""
line = line.strip()
if not line:
return None
# 提取时间戳 [300] 这样的
time_match = re.match(r'\[(\d+)\]', line)
if not time_match:
return None
timestamp = int(time_match.group(1))
# 去掉时间戳部分,后面全部是键值对
content = line[time_match.end():]
content = content.split(" ")
result = {"timestamp": timestamp}
for block in content:
parsed_block = parse_block(block)
result.update(parsed_block)
return result
def parse_block(block: str) -> Dict:
"""
解析一个数据块,返回一个字典
例如输入:
心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1
血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1
R组:;均方根:0;
仪器:60
"""
data = {}
parts = block.split(';')
if len(parts) == 1:
key, value = parts[0].split(':', 1)
data[key.strip()] = value.strip()
else:
key_0 = parts[0].split(':', 1)[0].strip()
for part in parts:
if len(part.strip()) == 0:
continue
if "结果延迟" in part:
part0, key1, value1 = part.partition('结果延迟:')
data[f"{key_0}_结果延迟"] = value1.strip()
key2, _, value2 = part0.partition(':')
if key2 and value2:
data[f"{key_0}_{key2.strip()}"] = value2.strip()
else:
key, value = part.split(':', 1)
data[f"{key_0}_{key.strip()}"] = value.strip()
return data
def parse_dat_file(file_path) -> pd.DataFrame:
"""
读取整个 .dat 文件并返回 pandas DataFrame
"""
data = []
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
parsed = parse_line(line)
if parsed:
data.append(parsed)
else:
if line.strip():
print(f"{line_num} 行解析失败,已跳过: {line.strip()[:80]}...")
df = pd.DataFrame(data)
# 按时间戳排序(一般已经是顺序,但保险起见)
if not df.empty:
df = df.sort_values("timestamp").reset_index(drop=True)
return df
if __name__ == '__main__':
# file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat"
file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat"
file_path = Path(file_path)
df = parse_dat_file(file_path)
df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig')