from pathlib import Path from typing import Union import numpy as np import pandas as pd # 尝试导入 Polars try: import polars as pl HAS_POLARS = True except ImportError: HAS_POLARS = False def read_signal_txt(path: Union[str, Path]) -> np.ndarray: """ Read a txt file and return the first column as a numpy array. Args: path (str | Path): Path to the txt file. Returns: np.ndarray: The first column of the txt file as a numpy array. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"File not found: {path}") if HAS_POLARS: df = pl.read_csv(path, has_header=False, infer_schema_length=0) return df[:, 0].to_numpy() else: df = pd.read_csv(path, header=None, dtype=float) return df.iloc[:, 0].to_numpy() def read_label_csv(path: Union[str, Path], verbose=True) -> pd.DataFrame: """ Read a CSV file and return it as a pandas DataFrame. Args: path (str | Path): Path to the CSV file. Returns: pd.DataFrame: The content of the CSV file as a pandas DataFrame. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"File not found: {path}") # 直接用pandas读取 包含中文 故指定编码 df = pd.read_csv(path, encoding="gbk") if verbose: print(f"Label file read from {path}, number of rows: {len(df)}") # 统计打标情况 # isLabeled=1 表示已打标 # Event type 有值的为PSG导出的事件 # Event type 为nan的为手动打标的事件 # score=1 显著事件, score=2 为受干扰事件 score=3 为非显著应删除事件 # 确认后的事件在correct_EventsType # 输出事件信息 按照总计事件、低通气、中枢性、阻塞性、混合型按行输出 格式为 总计/来自PSG/手动/删除/未标注 # Columns: # Index Event type Stage Time Epoch Date Duration HR bef. HR extr. HR delta O2 bef. O2 min. O2 delta Body Position Validation Start End score remark correct_Start correct_End correct_EventsType isLabeled # Event type: # Hypopnea # Central apnea # Obstructive apnea # Mixed apnea num_labeled = np.sum(df["isLabeled"] == 1) num_psg_events = np.sum(df["Event type"].notna()) num_manual_events = num_labeled - num_psg_events num_deleted = np.sum(df["score"] == 3) # 统计事件 num_total = np.sum((df["isLabeled"] == 1) & (df["score"] != 3)) num_unlabeled = num_total - num_labeled num_psg_hyp = np.sum(df["Event type"] == "Hypopnea") num_psg_csa = np.sum(df["Event type"] == "Central apnea") num_psg_osa = np.sum(df["Event type"] == "Obstructive apnea") num_psg_msa = np.sum(df["Event type"] == "Mixed apnea") num_hyp = np.sum((df["correct_EventsType"] == "Hypopnea") & (df["score"] != 3)) num_csa = np.sum((df["correct_EventsType"] == "Central apnea") & (df["score"] != 3)) num_osa = np.sum((df["correct_EventsType"] == "Obstructive apnea") & (df["score"] != 3)) num_msa = np.sum((df["correct_EventsType"] == "Mixed apnea") & (df["score"] != 3)) num_manual_hyp = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Hypopnea")) num_manual_csa = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Central apnea")) num_manual_osa = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Obstructive apnea")) num_manual_msa = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Mixed apnea")) num_deleted_hyp = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Hypopnea")) num_deleted_csa = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Central apnea")) num_deleted_osa = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Obstructive apnea")) num_deleted_msa = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Mixed apnea")) num_unlabeled_hyp = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Hypopnea")) num_unlabeled_csa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Central apnea")) num_unlabeled_osa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Obstructive apnea")) num_unlabeled_msa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Mixed apnea")) if verbose: print("Event Statistics:") # 格式化输出 总计/来自PSG/手动/删除/未标注 指定宽度 print("Type Total / PSG / Manual / Deleted / Unlabeled") print(f"Hypopnea: {num_hyp:4d} / {num_psg_hyp:4d} / {num_manual_hyp:4d} / {num_deleted_hyp:4d} / {num_unlabeled_hyp:4d}") print(f"Central apnea: {num_csa:4d} / {num_psg_csa:4d} / {num_manual_csa:4d} / {num_deleted_csa:4d} / {num_unlabeled_csa:4d}") print(f"Obstructive ap: {num_osa:4d} / {num_psg_osa:4d} / {num_manual_osa:4d} / {num_deleted_osa:4d} / {num_unlabeled_osa:4d}") print(f"Mixed apnea: {num_msa:4d} / {num_psg_msa:4d} / {num_manual_msa:4d} / {num_deleted_msa:4d} / {num_unlabeled_msa:4d}") print(f"Total events: {num_total:4d} / {num_psg_events:4d} / {num_manual_events:4d} / {num_deleted:4d} / {num_unlabeled:4d}") df["Start"] = df["Start"].astype(int) df["End"] = df["End"].astype(int) return df def read_disable_excel(path: Union[str, Path]) -> pd.DataFrame: """ Read an Excel file and return it as a pandas DataFrame. Args: path (str | Path): Path to the Excel file. Returns: pd.DataFrame: The content of the Excel file as a pandas DataFrame. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"File not found: {path}") # 直接用pandas读取 df = pd.read_excel(path) df["id"] = df["id"].astype(int) df["start"] = df["start"].astype(int) df["end"] = df["end"].astype(int) return df