""" 本脚本完成对呼研所数据的处理,包含以下功能: 1. 数据读取与预处理 从传入路径中,进行数据和标签的读取,并进行初步的预处理 预处理包括为数据进行滤波、去噪等操作 2. 数据清洗与异常值处理 3. 输出清晰后的统计信息 4. 数据保存 将处理后的数据保存到指定路径,便于后续使用 主要是保存切分后的数据位置和标签 5. 可视化 提供数据处理前后的可视化对比,帮助理解数据变化 绘制多条可用性趋势图,展示数据的可用区间、体动区间、低幅值区间等 todo: 使用mask 屏蔽无用区间 # 低幅值区间规则标定与剔除 # 高幅值连续体动规则标定与剔除 # 手动标定不可用区间提剔除 """ from pathlib import Path from typing import Union import utils import numpy as np import signal_method def process_one_signal(samp_id): signal_path = list((org_signal_root_path / f"{samp_id}").glob("OrgBCG_Sync_*.txt")) if not signal_path: raise FileNotFoundError(f"OrgBCG_Sync file not found for sample ID: {samp_id}") signal_path = signal_path[0] print(f"Processing OrgBCG_Sync signal file: {signal_path}") label_path = (label_root_path / f"{samp_id}").glob("SA Label_corrected.csv") if not label_path: raise FileNotFoundError(f"Label_corrected file not found for sample ID: {samp_id}") label_path = list(label_path)[0] print(f"Processing Label_corrected file: {label_path}") signal_data = utils.read_signal_txt(signal_path) signal_length = len(signal_data) print(f"signal_length: {signal_length}") signal_fs = int(signal_path.stem.split("_")[-1]) print(f"signal_fs: {signal_fs}") signal_second = signal_length // signal_fs print(f"signal_second: {signal_second}") # 滤波 # 50Hz陷波滤波器 # signal_data = utils.butterworth(data=signal_data, _type="bandpass", low_cut=0.5, high_cut=45, order=10, sample_rate=signal_fs) resp_data = utils.butterworth(data=signal_data, _type=conf["resp"]["filter_type"], low_cut=conf["resp"]["low_cut"], high_cut=conf["resp"]["high_cut"], order=conf["resp"]["order"], sample_rate=signal_fs) bcg_data = utils.butterworth(data=signal_data, _type=conf["bcg"]["filter_type"], low_cut=conf["bcg"]["low_cut"], high_cut=conf["bcg"]["high_cut"], order=conf["bcg"]["order"], sample_rate=signal_fs) label_data = utils.read_label_csv(path=label_path) label_mask = utils.generate_event_mask(signal_second=signal_second, event_df=label_data) manual_disable_mask = utils.generate_disable_mask(signal_second=signal_second, disable_df=all_samp_disable_df[all_samp_disable_df["id"] == samp_id]) print(f"disable_mask_shape: {manual_disable_mask.shape}, num_disable: {np.sum(manual_disable_mask == 0)}") # 分析Resp的低幅值区间 resp_low_amp_conf = getattr(conf, "resp_low_amp", None) if resp_low_amp_conf is not None: resp_low_amp_mask = signal_method.detect_low_amplitude_signal( signal_data=resp_data, sampling_rate=signal_fs, window_size_sec=resp_low_amp_conf["window_size_sec"], stride_sec=resp_low_amp_conf["stride_sec"], amplitude_threshold=resp_low_amp_conf["amplitude_threshold"], merge_gap_sec=resp_low_amp_conf["merge_gap_sec"], min_duration_sec=resp_low_amp_conf["min_duration_sec"] ) else: resp_low_amp_mask = None # 分析Resp的高幅值伪迹区间 resp_move if __name__ == '__main__': yaml_path = Path("./dataset_config/HYS_config.yaml") disable_df_path = Path("./排除区间.xlsx") conf = utils.load_dataset_conf(yaml_path) select_ids = conf["select_ids"] root_path = Path(conf["root_path"]) print(f"select_ids: {select_ids}") print(f"root_path: {root_path}") org_signal_root_path = root_path / "OrgBCG_Aligned" label_root_path = root_path / "Label" all_samp_disable_df = utils.read_disable_excel(disable_df_path) process_one_signal(select_ids[0])