import re import pandas as pd from pathlib import Path from typing import List, Dict def parse_line(line: str) -> Dict: """ 解析一行数据,返回一个字典 例如输入: [300]心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60 [325]心率:75,78,83,83,;标准差1:38.6641;标准差2:3.4187;均方根:80.3232;值:81;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:2 R组:;均方根:0; 仪器:60 [350]心率:78,83,83,83,;标准差1:46.0893;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 [375]心率:83,83,83,107,;标准差1:43.8422;标准差2:10.3923;均方根:90.1047;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 [400]心率:83,83,107,107,;标准差1:47.7501;标准差2:12;均方根:96.2549;值:82;信任级别:0 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:3 R组:;均方根:0; 仪器:60 """ line = line.strip() if not line: return None # 提取时间戳 [300] 这样的 time_match = re.match(r'\[(\d+)\]', line) if not time_match: return None timestamp = int(time_match.group(1)) # 去掉时间戳部分,后面全部是键值对 content = line[time_match.end():] content = content.split(" ") result = {"timestamp": timestamp} for block in content: parsed_block = parse_block(block) result.update(parsed_block) return result def parse_block(block: str) -> Dict: """ 解析一个数据块,返回一个字典 例如输入: 心率:78,83,83,83,;标准差1:43.0491;标准差2:2.16506;均方根:82.2787;值:82;信任级别:1 血氧组:;标准差1:0;标准差:0;均方根:0;值:0;信任级别:0结果延迟:1 R组:;均方根:0; 仪器:60 """ data = {} parts = block.split(';') if len(parts) == 1: key, value = parts[0].split(':', 1) data[key.strip()] = value.strip() else: key_0 = parts[0].split(':', 1)[0].strip() for part in parts: if len(part.strip()) == 0: continue if "结果延迟" in part: part0, key1, value1 = part.partition('结果延迟:') data[f"{key_0}_结果延迟"] = value1.strip() key2, _, value2 = part0.partition(':') if key2 and value2: data[f"{key_0}_{key2.strip()}"] = value2.strip() else: key, value = part.split(':', 1) data[f"{key_0}_{key.strip()}"] = value.strip() return data def parse_dat_file(file_path) -> pd.DataFrame: """ 读取整个 .dat 文件并返回 pandas DataFrame """ data = [] with open(file_path, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): parsed = parse_line(line) if parsed: data.append(parsed) else: if line.strip(): print(f"第 {line_num} 行解析失败,已跳过: {line.strip()[:80]}...") df = pd.DataFrame(data) # 按时间戳排序(一般已经是顺序,但保险起见) if not df.empty: df = df.sort_values("timestamp").reset_index(drop=True) return df if __name__ == '__main__': # file_path = "./data/spo2_alg_info[42-A3-C5-2F-F7-32]2025.11.17_17.01.14.dat" file_path = "./data/spo2_alg_info[42_A3_C5_2F_F7_32]2025.11.17_17.12.16.dat" file_path = Path(file_path) df = parse_dat_file(file_path) df.to_csv(file_path.with_suffix('.csv'), index=False, encoding='utf-8-sig')