Refactor event statistics calculations and improve output formatting

2025-10-12 20:30:46 +08:00 · 2025-10-12 20:30:46 +08:00 · 180d872cd7
commit 180d872cd7
parent d2ed6787d4
1 changed files with 49 additions and 20 deletions
--- a/utils/HYS_FileReader.py
+++ b/utils/HYS_FileReader.py
@ -7,6 +7,7 @@ import pandas as pd
 # 尝试导入 Polars
 try:
    import polars as pl
+
    HAS_POLARS = True
 except ImportError:
    HAS_POLARS = False
@ -67,14 +68,15 @@ def read_label_csv(path: Union[str, Path], verbose=True) -> pd.DataFrame:
    # Obstructive apnea
    # Mixed apnea

-    num_labeled = np.sum(df["isLabeled"] == 1)
+    num_total = np.sum((df["isLabeled"] == 1) & (df["score"] != 3))
+
    num_psg_events = np.sum(df["Event type"].notna())
-    num_manual_events = num_labeled - num_psg_events
+    num_manual_events = np.sum(df["Event type"].isna())
+
    num_deleted = np.sum(df["score"] == 3)

    # 统计事件
-    num_total = np.sum((df["isLabeled"] == 1) & (df["score"] != 3))
-    num_unlabeled = num_total - num_labeled
+    num_unlabeled = np.sum(df["isLabeled"] == -1)

    num_psg_hyp = np.sum(df["Event type"] == "Hypopnea")
    num_psg_csa = np.sum(df["Event type"] == "Central apnea")
@ -82,9 +84,9 @@ def read_label_csv(path: Union[str, Path], verbose=True) -> pd.DataFrame:
    num_psg_msa = np.sum(df["Event type"] == "Mixed apnea")

    num_hyp = np.sum((df["correct_EventsType"] == "Hypopnea") & (df["score"] != 3))
-    num_csa = np.sum((df["correct_EventsType"] == "Central apnea")  & (df["score"] != 3))
-    num_osa = np.sum((df["correct_EventsType"] == "Obstructive apnea")  & (df["score"] != 3))
-    num_msa = np.sum((df["correct_EventsType"] == "Mixed apnea")  & (df["score"] != 3))
+    num_csa = np.sum((df["correct_EventsType"] == "Central apnea") & (df["score"] != 3))
+    num_osa = np.sum((df["correct_EventsType"] == "Obstructive apnea") & (df["score"] != 3))
+    num_msa = np.sum((df["correct_EventsType"] == "Mixed apnea") & (df["score"] != 3))

    num_manual_hyp = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Hypopnea"))
    num_manual_csa = np.sum((df["Event type"].isna()) & (df["correct_EventsType"] == "Central apnea"))
@ -96,25 +98,52 @@ def read_label_csv(path: Union[str, Path], verbose=True) -> pd.DataFrame:
    num_deleted_osa = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Obstructive apnea"))
    num_deleted_msa = np.sum((df["score"] == 3) & (df["correct_EventsType"] == "Mixed apnea"))

-    num_unlabeled_hyp = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Hypopnea"))
-    num_unlabeled_csa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Central apnea"))
-    num_unlabeled_osa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Obstructive apnea"))
-    num_unlabeled_msa = np.sum((df["isLabeled"] == 0) & (df["correct_EventsType"] == "Mixed apnea"))
+    num_unlabeled_hyp = np.sum((df["isLabeled"] == -1) & (df["Event type"] == "Hypopnea"))
+    num_unlabeled_csa = np.sum((df["isLabeled"] == -1) & (df["Event type"] == "Central apnea"))
+    num_unlabeled_osa = np.sum((df["isLabeled"] == -1) & (df["Event type"] == "Obstructive apnea"))
+    num_unlabeled_msa = np.sum((df["isLabeled"] == -1) & (df["Event type"] == "Mixed apnea"))

+    num_hyp_1_score = np.sum((df["correct_EventsType"] == "Hypopnea") & (df["score"] == 1))
+    num_csa_1_score = np.sum((df["correct_EventsType"] == "Central apnea") & (df["score"] == 1))
+    num_osa_1_score = np.sum((df["correct_EventsType"] == "Obstructive apnea") & (df["score"] == 1))
+    num_msa_1_score = np.sum((df["correct_EventsType"] == "Mixed apnea") & (df["score"] == 1))

+    num_hyp_2_score = np.sum((df["correct_EventsType"] == "Hypopnea") & (df["score"] == 2))
+    num_csa_2_score = np.sum((df["correct_EventsType"] == "Central apnea") & (df["score"] == 2))
+    num_osa_2_score = np.sum((df["correct_EventsType"] == "Obstructive apnea") & (df["score"] == 2))
+    num_msa_2_score = np.sum((df["correct_EventsType"] == "Mixed apnea") & (df["score"] == 2))
+
+    num_hyp_3_score = np.sum((df["correct_EventsType"] == "Hypopnea") & (df["score"] == 3))
+    num_csa_3_score = np.sum((df["correct_EventsType"] == "Central apnea") & (df["score"] == 3))
+    num_osa_3_score = np.sum((df["correct_EventsType"] == "Obstructive apnea") & (df["score"] == 3))
+    num_msa_3_score = np.sum((df["correct_EventsType"] == "Mixed apnea") & (df["score"] == 3))
+
+    num_1_score = np.sum(df["score"] == 1)
+    num_2_score = np.sum(df["score"] == 2)
+    num_3_score = np.sum(df["score"] == 3)

    if verbose:
        print("Event Statistics:")
        # 格式化输出 总计/来自PSG/手动/删除/未标注 指定宽度
-        print("Type          Total / PSG / Manual / Deleted / Unlabeled")
-        print(f"Hypopnea:       {num_hyp:4d} / {num_psg_hyp:4d} / {num_manual_hyp:4d} / {num_deleted_hyp:4d} / {num_unlabeled_hyp:4d}")
-        print(f"Central apnea:  {num_csa:4d} / {num_psg_csa:4d} / {num_manual_csa:4d} / {num_deleted_csa:4d} / {num_unlabeled_csa:4d}")
-        print(f"Obstructive ap: {num_osa:4d} / {num_psg_osa:4d} / {num_manual_osa:4d} / {num_deleted_osa:4d} / {num_unlabeled_osa:4d}")
-        print(f"Mixed apnea:    {num_msa:4d} / {num_psg_msa:4d} / {num_manual_msa:4d} / {num_deleted_msa:4d} / {num_unlabeled_msa:4d}")
-        print(f"Total events:   {num_total:4d} / {num_psg_events:4d} / {num_manual_events:4d} / {num_deleted:4d} / {num_unlabeled:4d}")
-
-
+        print(f"Type {'Total':^8s} / {'From PSG':^8s} / {'Manual':^8s} / {'Deleted':^8s} / {'Unlabeled':^8s}")
+        print(
+            f"Hyp: {num_hyp:^8d} / {num_psg_hyp:^8d} / {num_manual_hyp:^8d} / {num_deleted_hyp:^8d} / {num_unlabeled_hyp:^8d}")
+        print(
+            f"CSA: {num_csa:^8d} / {num_psg_csa:^8d} / {num_manual_csa:^8d} / {num_deleted_csa:^8d} / {num_unlabeled_csa:^8d}")
+        print(
+            f"OSA: {num_osa:^8d} / {num_psg_osa:^8d} / {num_manual_osa:^8d} / {num_deleted_osa:^8d} / {num_unlabeled_osa:^8d}")
+        print(
+            f"MSA: {num_msa:^8d} / {num_psg_msa:^8d} / {num_manual_msa:^8d} / {num_deleted_msa:^8d} / {num_unlabeled_msa:^8d}")
+        print(
+            f"All: {num_total:^8d} / {num_psg_events:^8d} / {num_manual_events:^8d} / {num_deleted:^8d} / {num_unlabeled:^8d}")

+        print("Score Statistics (only for non-deleted events and manual created events):")
+        print(f"Type {'Total':^8s} / {'Score 1':^8s} / {'Score 2':^8s} / {'Score 3':^8s}")
+        print(f"Hyp: {num_hyp:^8d} / {num_hyp_1_score:^8d} / {num_hyp_2_score:^8d} / {num_hyp_3_score:^8d}")
+        print(f"CSA: {num_csa:^8d} / {num_csa_1_score:^8d} / {num_csa_2_score:^8d} / {num_csa_3_score:^8d}")
+        print(f"OSA: {num_osa:^8d} / {num_osa_1_score:^8d} / {num_osa_2_score:^8d} / {num_osa_3_score:^8d}")
+        print(f"MSA: {num_msa:^8d} / {num_msa_1_score:^8d} / {num_msa_2_score:^8d} / {num_msa_3_score:^8d}")
+        print(f"All: {num_total:^8d} / {num_1_score:^8d} / {num_2_score:^8d} / {num_3_score:^8d}")

    df["Start"] = df["Start"].astype(int)
    df["End"] = df["End"].astype(int)
@ -139,4 +168,4 @@ def read_disable_excel(path: Union[str, Path]) -> pd.DataFrame:
    df["id"] = df["id"].astype(int)
    df["start"] = df["start"].astype(int)
    df["end"] = df["end"].astype(int)
-    return df
+    return df