112 lines
4.1 KiB
Python
112 lines
4.1 KiB
Python
"""
|
|
本脚本完成对呼研所数据的处理,包含以下功能:
|
|
1. 数据读取与预处理
|
|
从传入路径中,进行数据和标签的读取,并进行初步的预处理
|
|
预处理包括为数据进行滤波、去噪等操作
|
|
2. 数据清洗与异常值处理
|
|
3. 输出清晰后的统计信息
|
|
4. 数据保存
|
|
将处理后的数据保存到指定路径,便于后续使用
|
|
主要是保存切分后的数据位置和标签
|
|
5. 可视化
|
|
提供数据处理前后的可视化对比,帮助理解数据变化
|
|
绘制多条可用性趋势图,展示数据的可用区间、体动区间、低幅值区间等
|
|
|
|
todo: 使用mask 屏蔽无用区间
|
|
|
|
|
|
# 低幅值区间规则标定与剔除
|
|
# 高幅值连续体动规则标定与剔除
|
|
# 手动标定不可用区间提剔除
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from typing import Union
|
|
import utils
|
|
import numpy as np
|
|
import signal_method
|
|
|
|
|
|
|
|
def process_one_signal(samp_id):
|
|
signal_path = list((org_signal_root_path / f"{samp_id}").glob("OrgBCG_Sync_*.txt"))
|
|
if not signal_path:
|
|
raise FileNotFoundError(f"OrgBCG_Sync file not found for sample ID: {samp_id}")
|
|
signal_path = signal_path[0]
|
|
print(f"Processing OrgBCG_Sync signal file: {signal_path}")
|
|
|
|
label_path = (label_root_path / f"{samp_id}").glob("SA Label_corrected.csv")
|
|
if not label_path:
|
|
raise FileNotFoundError(f"Label_corrected file not found for sample ID: {samp_id}")
|
|
label_path = list(label_path)[0]
|
|
print(f"Processing Label_corrected file: {label_path}")
|
|
|
|
|
|
signal_data = utils.read_signal_txt(signal_path)
|
|
signal_length = len(signal_data)
|
|
print(f"signal_length: {signal_length}")
|
|
signal_fs = int(signal_path.stem.split("_")[-1])
|
|
print(f"signal_fs: {signal_fs}")
|
|
signal_second = signal_length // signal_fs
|
|
print(f"signal_second: {signal_second}")
|
|
|
|
# 滤波
|
|
# 50Hz陷波滤波器
|
|
# signal_data = utils.butterworth(data=signal_data, _type="bandpass", low_cut=0.5, high_cut=45, order=10, sample_rate=signal_fs)
|
|
resp_data = utils.butterworth(data=signal_data, _type=conf["resp"]["filter_type"], low_cut=conf["resp"]["low_cut"],
|
|
high_cut=conf["resp"]["high_cut"], order=conf["resp"]["order"], sample_rate=signal_fs)
|
|
|
|
bcg_data = utils.butterworth(data=signal_data, _type=conf["bcg"]["filter_type"], low_cut=conf["bcg"]["low_cut"],
|
|
high_cut=conf["bcg"]["high_cut"], order=conf["bcg"]["order"], sample_rate=signal_fs)
|
|
|
|
|
|
label_data = utils.read_label_csv(path=label_path)
|
|
label_mask = utils.generate_event_mask(signal_second=signal_second, event_df=label_data)
|
|
|
|
manual_disable_mask = utils.generate_disable_mask(signal_second=signal_second, disable_df=all_samp_disable_df[all_samp_disable_df["id"] == samp_id])
|
|
print(f"disable_mask_shape: {manual_disable_mask.shape}, num_disable: {np.sum(manual_disable_mask == 0)}")
|
|
|
|
# 分析Resp的低幅值区间
|
|
resp_low_amp_conf = getattr(conf, "resp_low_amp", None)
|
|
if resp_low_amp_conf is not None:
|
|
resp_low_amp_mask = signal_method.detect_low_amplitude_signal(
|
|
signal_data=resp_data,
|
|
sampling_rate=signal_fs,
|
|
window_size_sec=resp_low_amp_conf["window_size_sec"],
|
|
stride_sec=resp_low_amp_conf["stride_sec"],
|
|
amplitude_threshold=resp_low_amp_conf["amplitude_threshold"],
|
|
merge_gap_sec=resp_low_amp_conf["merge_gap_sec"],
|
|
min_duration_sec=resp_low_amp_conf["min_duration_sec"]
|
|
)
|
|
else:
|
|
resp_low_amp_mask = None
|
|
|
|
# 分析Resp的高幅值伪迹区间
|
|
resp_move
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
yaml_path = Path("./dataset_config/HYS_config.yaml")
|
|
disable_df_path = Path("./排除区间.xlsx")
|
|
|
|
conf = utils.load_dataset_conf(yaml_path)
|
|
select_ids = conf["select_ids"]
|
|
root_path = Path(conf["root_path"])
|
|
|
|
print(f"select_ids: {select_ids}")
|
|
print(f"root_path: {root_path}")
|
|
|
|
org_signal_root_path = root_path / "OrgBCG_Aligned"
|
|
label_root_path = root_path / "Label"
|
|
|
|
all_samp_disable_df = utils.read_disable_excel(disable_df_path)
|
|
|
|
process_one_signal(select_ids[0])
|