尝试融合手工特征失败,暂时备份

This commit is contained in:
andrew 2022-10-24 14:57:30 +08:00
parent 8971199f3b
commit e762909975
52 changed files with 8216 additions and 43 deletions

View File

@ -0,0 +1,76 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net014.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/14
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# 修改激活函数
# 提高呼吸采样率
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 10
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET012(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET012, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=32,
num_layers=2,
bidirectional=True,
batch_first=True)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear(64, 8),
nn.GELU(),
nn.Linear(8, num_classes),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x):
x, (_, _) = self.lstm(x)
# print(x.shape)
x = x[:, -1]
x = torch.flatten(x, start_dim=1)
# print(x.shape)
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET012().cuda()
summary(model, [(32, 300, 1)])

View File

@ -15,7 +15,7 @@ from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 修改激活函数
# 提高呼吸采样率

View File

@ -21,7 +21,7 @@ from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net017 import HYBRIDNET015
from model.Hybrid_Net017 import HYBRIDNET017
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
from utils.train_set_visual import visual_segment
@ -218,7 +218,7 @@ if __name__ == '__main__':
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(32, 300, 1)])
summary(model_net, [(32, 1, 300)])
time.sleep(3)
if gpu:

View File

@ -62,35 +62,10 @@ class BasicBlock_1d(nn.Module):
out = self.relu(out)
return out
class HYBRIDNET017(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET017, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=8,
num_layers=3,
bidirectional=True,
batch_first=True)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear(4800, 128),
nn.GELU(),
nn.Linear(128, num_classes),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
class ResNet_1d(nn.Module):
def __init__(self, block, number_block, num_classes=2, init_weights=True):
super(ResNet_1d, self).__init__()
self.in_channel = 64
@ -111,13 +86,15 @@ class HYBRIDNET017(nn.Module):
self.features = nn.Sequential(
# nn.Linear(in_features=1024, out_features=nc),
nn.Flatten(),
nn.Linear(in_features=512 * 23, out_features=512),
nn.Linear(in_features=512 * 2, out_features=512),
nn.Linear(in_features=512, out_features=num_classes)
# nn.Softmax()
# nn.Sigmoid()
)
# self.linear = nn.Linear(512 * block.expansion, num_classes)
if init_weights:
self.initialize_weights()
def _make_layer(self, block, out_channel, num_block, stride):
strides = [stride] + [1] * (num_block - 1)
@ -127,18 +104,36 @@ class HYBRIDNET017(nn.Module):
self.in_channel = out_channel * block.expansion
return nn.Sequential(*layers)
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x):
x, (_, _) = self.lstm(x)
# print(x.shape)
# x = x[:, -1]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.pool1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.pool2(x)
x = x.view(x.size(0), -1)
x = torch.flatten(x, start_dim=1)
# print(x.shape)
x = self.classifier(x)
x = self.features(x)
return x
def HYBRIDNET017():
return ResNet_1d(BasicBlock_1d, [2, 2, 2, 2])
if __name__ == '__main__':
model = HYBRIDNET017().cuda()
summary(model, [(32, 300, 1)])
summary(model, [(32, 1, 300)])

View File

@ -39,7 +39,7 @@ def my_segment_augment(dataset, SP, EP):
# dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::10]
dataset_low = dataset_low.reshape(-1, 1)
dataset_low = dataset_low.reshape(1, -1)
# _, _, dataset_high = stft(dataset_high, 100, nperseg=50)
# dataset_high = dataset_high.astype(np.float).T

View File

@ -10,7 +10,7 @@ Path:
batch_size: 256
number_worker: 0
model_name: HYBRIDNET015
model_name: HYBRIDNET017
select_sampno:
- 88
- 220
@ -60,7 +60,7 @@ select_sampno:
# train hyperparameters config
epoch: 300
lr: 0.01
lr: 0.0001
nc: 1
# wandb config

View File

@ -25,7 +25,7 @@ from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net017 import HYBRIDNET015
from model.Hybrid_Net017 import HYBRIDNET017
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")

View File

@ -0,0 +1,486 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# A01
# 以每三十秒为窗 步进十秒, 每个窗最后十秒内 超过五秒存在暂停则为正样本
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 10
view_apnea_length = 10
apnea_length_threshold = 5
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
select_numpy_len = len(select_numpy) // 100
event_list = np.zeros(select_numpy_len)
quality_list = np.zeros(select_numpy_len)
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
print(label.to_numpy())
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label.to_numpy())
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
event_list[label["new_start"]: label["new_end"]] = label_type
quality_list[label["new_start"]: label["new_end"]] = label["new_label"]
# segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
for SP in range(not_zero_point, select_numpy_len - window_second, step_second):
start = SP + window_second - view_apnea_length
end = SP + window_second
i = SP // 30
if sum((select_numpy[start:end] > 1)) >= apnea_length_threshold:
segment_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second])
else:
negative_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second]
)
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
test_label = []
# 分成指定窗长的滑窗片段
test_label = segment_labels + negative_labels
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
train_label = segment_labels[:train_segment_num]
valid_label = segment_labels[train_segment_num:train_segment_num + valid_segment_num]
test_label = segment_labels[train_segment_num + valid_segment_num:]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
if 0 < valid_ratio < 1:
train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
valid_eve2 = int(len(negative_labels) * valid_ratio)
else:
train_eve2 = len(negative_labels) - valid_ratio - test_ratio
valid_eve2 = valid_ratio
test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# # 直接补充到足够个数的背景事件
# train_eve2 = max(train_eve, 300)
# valid_eve2 = max(valid_eve, 40)
# test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
train_label += negative_labels[:train_eve2]
valid_label += negative_labels[train_eve2: train_eve2 + valid_eve2]
test_label += negative_labels[train_eve2 + valid_eve2:]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

155
exam/038/load_dataset.py Normal file
View File

@ -0,0 +1,155 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

298
exam/038/main.py Normal file
View File

@ -0,0 +1,298 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net018 import HYBRIDNET018
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
from utils.train_set_visual import visual_segment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, one in pbar:
resp, labels = one[:2]
resp = resp.float().cuda() if gpu else resp.float()
# stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
# 强行归一化数据
# segments = F.normalize(segments)
# print(segments.size())
# 减去平均值
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
loss = loss_func(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for one in valid_loader:
resp, labels = one[:2]
resp = resp.float().cuda() if gpu else resp.float()
# stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
valid_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for one in test_loader:
resp, labels = one[:2]
resp = resp.float().cuda() if gpu else resp.float()
# stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(32, 1, 300)])
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]}")
# 损失函数与优化器
loss_function = nn.CrossEntropyLoss(
weight=torch.Tensor([train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
# optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 整点图看看
for one_batch in train_loader:
visual_segment(one_batch, sub_save_dir / "sample")
break
del one_batch
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,139 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net014.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/14
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 修改激活函数
# 提高呼吸采样率
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 10
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class BasicBlock_1d(nn.Module):
expansion = 1
def __init__(self, input_channel, output_channel, stride=1):
super(BasicBlock_1d, self).__init__()
self.left = nn.Sequential(
nn.Conv1d(in_channels=input_channel, out_channels=output_channel,
kernel_size=9, stride=stride, padding=4, bias=False),
nn.BatchNorm1d(output_channel),
nn.GELU(),
nn.Conv1d(in_channels=output_channel, out_channels=output_channel,
kernel_size=9, stride=1, padding=4, bias=False),
nn.BatchNorm1d(output_channel)
)
self.right = nn.Sequential()
if stride != 1 or input_channel != self.expansion * output_channel:
self.right = nn.Sequential(
nn.Conv1d(in_channels=input_channel, out_channels=output_channel * self.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm1d(self.expansion * output_channel)
)
self.relu = nn.GELU()
def forward(self, x):
out = self.left(x)
residual = self.right(x)
out += residual
out = self.relu(out)
return out
class ResNet_1d(nn.Module):
def __init__(self, block, number_block, num_classes=2, init_weights=True):
super(ResNet_1d, self).__init__()
self.in_channel = 64
self.conv1 = nn.Conv1d(in_channels=1, out_channels=self.in_channel,
kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm1d(64)
self.relu = nn.GELU()
self.pool1 = nn.AvgPool1d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block=block, out_channel=64, num_block=number_block[0], stride=1)
self.layer2 = self._make_layer(block=block, out_channel=128, num_block=number_block[1], stride=2)
self.layer3 = self._make_layer(block=block, out_channel=256, num_block=number_block[2], stride=2)
self.layer4 = self._make_layer(block=block, out_channel=512, num_block=number_block[3], stride=2)
self.pool2 = nn.MaxPool1d(4)
self.linear = nn.Linear(512, num_classes)
self.features = nn.Sequential(
# nn.Linear(in_features=1024, out_features=nc),
nn.Flatten(),
# nn.Linear(in_features=512 * 2, out_features=512),
nn.Linear(in_features=512 * 2, out_features=num_classes)
# nn.Softmax()
# nn.Sigmoid()
)
# self.linear = nn.Linear(512 * block.expansion, num_classes)
if init_weights:
self.initialize_weights()
def _make_layer(self, block, out_channel, num_block, stride):
strides = [stride] + [1] * (num_block - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channel, out_channel, stride))
self.in_channel = out_channel * block.expansion
return nn.Sequential(*layers)
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.pool1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.pool2(x)
x = x.view(x.size(0), -1)
x = self.features(x)
return x
def HYBRIDNET018():
return ResNet_1d(BasicBlock_1d, [2, 2, 2, 2])
if __name__ == '__main__':
model = HYBRIDNET018().cuda()
summary(model, [(32, 1, 300)])

54
exam/038/my_augment.py Normal file
View File

@ -0,0 +1,54 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset -= dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.5, order=4)
# dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low}
# "high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
# dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::10]
dataset_low = dataset_low.reshape(1, -1)
# _, _, dataset_high = stft(dataset_high, 100, nperseg=50)
# dataset_high = dataset_high.astype(np.float).T
# dataset_high = dataset_high.reshape(dataset_high.shape[0], dataset_high.shape[1])
# return dataset_low, dataset_high
return [dataset_low]
if __name__ == '__main__':
pass

77
exam/038/settings.yaml Normal file
View File

@ -0,0 +1,77 @@
# environment config
GPU: "0"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET018
select_sampno:
- 88
- 220
- 221
- 229
- 282
- 286
- 541
- 579
- 582
- 670
- 671
- 683
- 703
- 704
- 726
- 735
- 736
- 933
- 935
- 952
- 954
- 955
- 960
- 961
- 962
- 966
- 967
- 969
- 971
- 972
- 1000
- 1004
- 1006
- 1009
- 1010
- 1296
- 1300
- 1301
- 1302
- 1308
- 1354
- 1374
- 1378
- 1478
# train hyperparameters config
epoch: 100
lr: 0.0001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID010 RESP "
tags: ["ReduceLROnPlateau", "RESP LSTM"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,477 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import logging
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net018 import HYBRIDNET018
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 0
# 最小事件长度
thresh_event_length = 2
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.ticker').disabled = True
logger = logging.getLogger()
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
if (exam_path / "test.log").exists():
(exam_path / "test.log").unlink()
fh = logging.FileHandler(exam_path / "test.log", mode='a')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
logger.info("------------------------------------")
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
logger.info(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
else:
sampNo = None
logger.info("出错了")
exam_name = Path("./").absolute().name
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, labels = one[:2]
other_info = one[2:]
resp = resp.float().cuda() if gpu else resp.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(f"EXAM_NAME: {exam_name} SampNO: {sampNo}")
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
logger.info(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
logger.info(base_path, dataset_type, "is None")
return
if df_analysis.empty:
logger.info(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
# if value > 30:
# logger.info([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
# for segment_no in df["segmentNo"].unique():
# df_temp = df[df["segmentNo"] == segment_no]
# if df_temp["thresh_Pred"].sum() > 0:
# continue
#
# df_event = df_event.append(pd.DataFrame(
# [[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
# (segment_no + 1) * 30, 0]], columns=columns),
# ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:train_set_visual.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/15
@description: 检查数据集是否按照预期处理
"""
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
fig = plt.figure(figsize=(10, 2), dpi=200)
plt.tight_layout()
def visual_segment(one, save_dir):
save_dir.mkdir(exist_ok=True, parents=True)
one = [two.cpu().numpy() for two in one]
segment, label_type, PN, segmentNo, label_type, new_label, SP, EP = one
print("drawing sample")
for i in tqdm(range(len(one[0]) if len(one[0]) < 128 else 128)):
plt.clf()
plt.plot(np.linspace(SP[i], EP[i], len(segment[i][0])), segment[i][0])
plt.title(f"sampNo:{PN[i]} label_type:{label_type[i]} {SP[i]}:{EP[i]}")
plt.xlabel("时间 秒")
plt.ylabel("心晓信号幅值")
plt.tight_layout()
plt.savefig(save_dir / f"sampNo {PN[i]} {SP[i]} {EP[i]} label_type {label_type[i]}")
if __name__ == '__main__':
pass

View File

@ -0,0 +1,486 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# A01
# 以每三十秒为窗 步进十秒, 每个窗最后十秒内 超过五秒存在暂停则为正样本
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 10
view_apnea_length = 10
apnea_length_threshold = 5
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
select_numpy_len = len(select_numpy) // 100
event_list = np.zeros(select_numpy_len)
quality_list = np.zeros(select_numpy_len)
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
print(label.to_numpy())
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label.to_numpy())
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
event_list[label["new_start"]: label["new_end"]] = label_type
quality_list[label["new_start"]: label["new_end"]] = label["new_label"]
# segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
for SP in range(not_zero_point, select_numpy_len - window_second, step_second):
start = SP + window_second - view_apnea_length
end = SP + window_second
i = SP // 30
if sum((select_numpy[start:end] > 1)) >= apnea_length_threshold:
segment_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second])
else:
negative_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second]
)
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
test_label = []
# 分成指定窗长的滑窗片段
test_label = segment_labels + negative_labels
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
train_label = segment_labels[:train_segment_num]
valid_label = segment_labels[train_segment_num:train_segment_num + valid_segment_num]
test_label = segment_labels[train_segment_num + valid_segment_num:]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
if 0 < valid_ratio < 1:
train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
valid_eve2 = int(len(negative_labels) * valid_ratio)
else:
train_eve2 = len(negative_labels) - valid_ratio - test_ratio
valid_eve2 = valid_ratio
test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# # 直接补充到足够个数的背景事件
# train_eve2 = max(train_eve, 300)
# valid_eve2 = max(valid_eve, 40)
# test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
train_label += negative_labels[:train_eve2]
valid_label += negative_labels[train_eve2: train_eve2 + valid_eve2]
test_label += negative_labels[train_eve2 + valid_eve2:]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

155
exam/041/load_dataset.py Normal file
View File

@ -0,0 +1,155 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

298
exam/041/main.py Normal file
View File

@ -0,0 +1,298 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net021 import HYBRIDNET021
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
from utils.train_set_visual import visual_segment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, one in pbar:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
# 强行归一化数据
# segments = F.normalize(segments)
# print(segments.size())
# 减去平均值
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
loss = loss_func(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for one in valid_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
valid_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for one in test_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, (32, 300, 1))
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]}")
# 损失函数与优化器
loss_function = nn.CrossEntropyLoss(
weight=torch.Tensor([train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
# optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 整点图看看
for one_batch in train_loader:
visual_segment(one_batch, sub_save_dir / "sample")
break
del one_batch
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,79 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net014.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/14
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 修改激活函数
# 提高呼吸采样率
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 10
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET021(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET021, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=32,
num_layers=2,
bidirectional=True,
batch_first=True)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear(64, 8),
nn.GELU(),
nn.Linear(8, num_classes),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x1):
x1, (_, _) = self.lstm(x1)
# print(x1.shape)
x1 = x1[:, -1]
x1 = torch.flatten(x1, start_dim=1)
# print(x1.shape)
# x2 = x2.squeeze()
# x = torch.cat((x1, x2), dim=1)
x = x1
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET021().cuda()
summary(model, [(32, 300, 1), (32, 3, 1)])

79
exam/041/my_augment.py Normal file
View File

@ -0,0 +1,79 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset -= dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.5, order=4)
# dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low}
# "high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
# dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::10]
# 获取整段的特征 31
# 按照十秒窗获取 33
# 按照十秒窗步进两秒获取 321
sub_windows_size = 30
stride = 1
manual_feature = [[], [], []]
SP = 0
EP = sub_windows_size
while EP <= sub_windows_size:
# mean
manual_feature[0].append(dataset_low[SP:EP].mean())
# var
manual_feature[1].append(dataset_low[SP:EP].var())
# RMS
manual_feature[2].append(np.sqrt((dataset_low[SP:EP] ** 2).mean()))
SP += stride
EP += stride
dataset_low = dataset_low.reshape(-1, 1)
manual_feature = np.array(manual_feature)
manual_feature = manual_feature.reshape(-1, 1)
# _, _, dataset_high = stft(dataset_high, 100, nperseg=50)
# dataset_high = dataset_high.astype(np.float).T
# dataset_high = dataset_high.reshape(dataset_high.shape[0], dataset_high.shape[1])
# return dataset_low, dataset_high
return dataset_low, manual_feature
if __name__ == '__main__':
pass

77
exam/041/settings.yaml Normal file
View File

@ -0,0 +1,77 @@
# environment config
GPU: "0"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET021
select_sampno:
- 88
- 220
- 221
- 229
- 282
- 286
- 541
- 579
- 582
- 670
- 671
- 683
- 703
- 704
- 726
- 735
- 736
- 933
- 935
- 952
- 954
- 955
- 960
- 961
- 962
- 966
- 967
- 969
- 971
- 972
- 1000
- 1004
- 1006
- 1009
- 1010
- 1296
- 1300
- 1301
- 1302
- 1308
- 1354
- 1374
- 1378
- 1478
# train hyperparameters config
epoch: 100
lr: 0.0001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID010 RESP "
tags: ["ReduceLROnPlateau", "RESP LSTM"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,477 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import logging
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net021 import HYBRIDNET018
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 0
# 最小事件长度
thresh_event_length = 2
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.ticker').disabled = True
logger = logging.getLogger()
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
if (exam_path / "test.log").exists():
(exam_path / "test.log").unlink()
fh = logging.FileHandler(exam_path / "test.log", mode='a')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
logger.info("------------------------------------")
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
logger.info(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
else:
sampNo = None
logger.info("出错了")
exam_name = Path("./").absolute().name
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, labels = one[:2]
other_info = one[2:]
resp = resp.float().cuda() if gpu else resp.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(f"EXAM_NAME: {exam_name} SampNO: {sampNo}")
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
logger.info(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
logger.info(base_path, dataset_type, "is None")
return
if df_analysis.empty:
logger.info(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
# if value > 30:
# logger.info([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
# for segment_no in df["segmentNo"].unique():
# df_temp = df[df["segmentNo"] == segment_no]
# if df_temp["thresh_Pred"].sum() > 0:
# continue
#
# df_event = df_event.append(pd.DataFrame(
# [[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
# (segment_no + 1) * 30, 0]], columns=columns),
# ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:train_set_visual.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/15
@description: 检查数据集是否按照预期处理
"""
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
fig = plt.figure(figsize=(10, 2), dpi=200)
plt.tight_layout()
def visual_segment(one, save_dir):
save_dir.mkdir(exist_ok=True, parents=True)
one = [two.cpu().numpy() for two in one]
segment, stft, label_type, PN, segmentNo, label_type, new_label, SP, EP = one
print("drawing sample")
for i in tqdm(range(len(one[0]) if len(one[0]) < 128 else 128)):
plt.clf()
plt.plot(np.linspace(SP[i], EP[i], len(segment[i][0])), segment[i][0])
plt.title(f"sampNo:{PN[i]} label_type:{label_type[i]} {SP[i]}:{EP[i]}")
plt.xlabel("时间 秒")
plt.ylabel("心晓信号幅值")
plt.tight_layout()
plt.savefig(save_dir / f"sampNo {PN[i]} {SP[i]} {EP[i]} label_type {label_type[i]}")
if __name__ == '__main__':
pass

View File

@ -0,0 +1,486 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# A01
# 以每三十秒为窗 步进十秒, 每个窗最后十秒内 超过五秒存在暂停则为正样本
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 10
view_apnea_length = 10
apnea_length_threshold = 5
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
select_numpy_len = len(select_numpy) // 100
event_list = np.zeros(select_numpy_len)
quality_list = np.zeros(select_numpy_len)
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
print(label.to_numpy())
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label.to_numpy())
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
event_list[label["new_start"]: label["new_end"]] = label_type
quality_list[label["new_start"]: label["new_end"]] = label["new_label"]
# segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
for SP in range(not_zero_point, select_numpy_len - window_second, step_second):
start = SP + window_second - view_apnea_length
end = SP + window_second
i = SP // 30
if sum((select_numpy[start:end] > 1)) >= apnea_length_threshold:
segment_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second])
else:
negative_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second]
)
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
test_label = []
# 分成指定窗长的滑窗片段
test_label = segment_labels + negative_labels
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
train_label = segment_labels[:train_segment_num]
valid_label = segment_labels[train_segment_num:train_segment_num + valid_segment_num]
test_label = segment_labels[train_segment_num + valid_segment_num:]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
if 0 < valid_ratio < 1:
train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
valid_eve2 = int(len(negative_labels) * valid_ratio)
else:
train_eve2 = len(negative_labels) - valid_ratio - test_ratio
valid_eve2 = valid_ratio
test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# # 直接补充到足够个数的背景事件
# train_eve2 = max(train_eve, 300)
# valid_eve2 = max(valid_eve, 40)
# test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
train_label += negative_labels[:train_eve2]
valid_label += negative_labels[train_eve2: train_eve2 + valid_eve2]
test_label += negative_labels[train_eve2 + valid_eve2:]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

155
exam/042/load_dataset.py Normal file
View File

@ -0,0 +1,155 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

298
exam/042/main.py Normal file
View File

@ -0,0 +1,298 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net021 import HYBRIDNET021
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
from utils.train_set_visual import visual_segment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, one in pbar:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
# 强行归一化数据
# segments = F.normalize(segments)
# print(segments.size())
# 减去平均值
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
loss = loss_func(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for one in valid_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
valid_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for one in test_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(32, 300, 1), (32, 3, 1)])
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]}")
# 损失函数与优化器
loss_function = nn.CrossEntropyLoss(
weight=torch.Tensor([train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
# optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 整点图看看
for one_batch in train_loader:
visual_segment(one_batch, sub_save_dir / "sample")
break
del one_batch
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,78 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net014.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/14
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 修改激活函数
# 提高呼吸采样率
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 10
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET021(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET021, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=32,
num_layers=2,
bidirectional=True,
batch_first=True)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear(67, 8),
nn.GELU(),
nn.Linear(8, num_classes),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x1, x2):
x1, (_, _) = self.lstm(x1)
# print(x1.shape)
x1 = x1[:, -1]
x1 = torch.flatten(x1, start_dim=1)
# print(x1.shape)
x2 = x2.squeeze()
x = torch.cat((x1, x2), dim=1)
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET021().cuda()
summary(model, [(32, 300, 1), (32, 3, 1)])

80
exam/042/my_augment.py Normal file
View File

@ -0,0 +1,80 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset -= dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.5, order=4)
dataset_low = (dataset_low - dataset_low.mean()) / dataset_low.std()
# dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low}
# "high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
# dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::10]
# 获取整段的特征 31
# 按照十秒窗获取 33
# 按照十秒窗步进两秒获取 321
sub_windows_size = 30
stride = 1
manual_feature = [[], [], []]
SP = 0
EP = sub_windows_size
while EP <= sub_windows_size:
# mean
manual_feature[0].append(abs(dataset_low[SP:EP]).mean())
# var
manual_feature[1].append(abs(dataset_low[SP:EP]).var())
# RMS
manual_feature[2].append(np.sqrt((dataset_low[SP:EP] ** 2).mean()))
SP += stride
EP += stride
dataset_low = dataset_low.reshape(-1, 1)
manual_feature = np.array(manual_feature)
manual_feature = manual_feature.reshape(-1, 1)
# _, _, dataset_high = stft(dataset_high, 100, nperseg=50)
# dataset_high = dataset_high.astype(np.float).T
# dataset_high = dataset_high.reshape(dataset_high.shape[0], dataset_high.shape[1])
# return dataset_low, dataset_high
return dataset_low, manual_feature
if __name__ == '__main__':
pass

77
exam/042/settings.yaml Normal file
View File

@ -0,0 +1,77 @@
# environment config
GPU: "0"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET021
select_sampno:
- 88
- 220
- 221
- 229
- 282
- 286
- 541
- 579
- 582
- 670
- 671
- 683
- 703
- 704
- 726
- 735
- 736
- 933
- 935
- 952
- 954
- 955
- 960
- 961
- 962
- 966
- 967
- 969
- 971
- 972
- 1000
- 1004
- 1006
- 1009
- 1010
- 1296
- 1300
- 1301
- 1302
- 1308
- 1354
- 1374
- 1378
- 1478
# train hyperparameters config
epoch: 100
lr: 0.0001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID010 RESP "
tags: ["ReduceLROnPlateau", "RESP LSTM"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,477 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import logging
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net021 import HYBRIDNET018
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 0
# 最小事件长度
thresh_event_length = 2
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.ticker').disabled = True
logger = logging.getLogger()
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
if (exam_path / "test.log").exists():
(exam_path / "test.log").unlink()
fh = logging.FileHandler(exam_path / "test.log", mode='a')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
logger.info("------------------------------------")
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
logger.info(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
else:
sampNo = None
logger.info("出错了")
exam_name = Path("./").absolute().name
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, labels = one[:2]
other_info = one[2:]
resp = resp.float().cuda() if gpu else resp.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(f"EXAM_NAME: {exam_name} SampNO: {sampNo}")
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
logger.info(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
logger.info(base_path, dataset_type, "is None")
return
if df_analysis.empty:
logger.info(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
# if value > 30:
# logger.info([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
# for segment_no in df["segmentNo"].unique():
# df_temp = df[df["segmentNo"] == segment_no]
# if df_temp["thresh_Pred"].sum() > 0:
# continue
#
# df_event = df_event.append(pd.DataFrame(
# [[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
# (segment_no + 1) * 30, 0]], columns=columns),
# ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:train_set_visual.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/15
@description: 检查数据集是否按照预期处理
"""
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
fig = plt.figure(figsize=(10, 2), dpi=200)
plt.tight_layout()
def visual_segment(one, save_dir):
save_dir.mkdir(exist_ok=True, parents=True)
one = [two.cpu().numpy() for two in one]
segment, stft, label_type, PN, segmentNo, label_type, new_label, SP, EP = one
print("drawing sample")
for i in tqdm(range(len(one[0]) if len(one[0]) < 128 else 128)):
plt.clf()
plt.plot(np.linspace(SP[i], EP[i], len(segment[i][0])), segment[i][0])
plt.title(f"sampNo:{PN[i]} label_type:{label_type[i]} {SP[i]}:{EP[i]}")
plt.xlabel("时间 秒")
plt.ylabel("心晓信号幅值")
plt.tight_layout()
plt.savefig(save_dir / f"sampNo {PN[i]} {SP[i]} {EP[i]} label_type {label_type[i]}")
if __name__ == '__main__':
pass

View File

@ -0,0 +1,486 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# A01
# 以每三十秒为窗 步进十秒, 每个窗最后十秒内 超过五秒存在暂停则为正样本
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 10
view_apnea_length = 10
apnea_length_threshold = 5
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
select_numpy_len = len(select_numpy) // 100
event_list = np.zeros(select_numpy_len)
quality_list = np.zeros(select_numpy_len)
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
print(label.to_numpy())
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label.to_numpy())
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
event_list[label["new_start"]: label["new_end"]] = label_type
quality_list[label["new_start"]: label["new_end"]] = label["new_label"]
# segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
for SP in range(not_zero_point, select_numpy_len - window_second, step_second):
start = SP + window_second - view_apnea_length
end = SP + window_second
i = SP // 30
if sum((select_numpy[start:end] > 1)) >= apnea_length_threshold:
segment_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second])
else:
negative_labels.append(
[sampNo, i, event_list[start:end].max(), quality_list[start:end].max(), SP, SP + window_second]
)
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
test_label = []
# 分成指定窗长的滑窗片段
test_label = segment_labels + negative_labels
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
train_label = segment_labels[:train_segment_num]
valid_label = segment_labels[train_segment_num:train_segment_num + valid_segment_num]
test_label = segment_labels[train_segment_num + valid_segment_num:]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
if 0 < valid_ratio < 1:
train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
valid_eve2 = int(len(negative_labels) * valid_ratio)
else:
train_eve2 = len(negative_labels) - valid_ratio - test_ratio
valid_eve2 = valid_ratio
test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# # 直接补充到足够个数的背景事件
# train_eve2 = max(train_eve, 300)
# valid_eve2 = max(valid_eve, 40)
# test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
train_label += negative_labels[:train_eve2]
valid_label += negative_labels[train_eve2: train_eve2 + valid_eve2]
test_label += negative_labels[train_eve2 + valid_eve2:]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label).astype('float64')[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label).astype('float64')[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label).astype('float64')[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

155
exam/043/load_dataset.py Normal file
View File

@ -0,0 +1,155 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

298
exam/043/main.py Normal file
View File

@ -0,0 +1,298 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net023 import HYBRIDNET023
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
from utils.train_set_visual import visual_segment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, one in pbar:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
# 强行归一化数据
# segments = F.normalize(segments)
# print(segments.size())
# 减去平均值
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
loss = loss_func(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for one in valid_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
valid_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for one in test_loader:
resp, feature, labels = one[:3]
resp = resp.float().cuda() if gpu else resp.float()
feature = feature.float().cuda() if gpu else feature.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, feature)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(32, 300, 1), (32, 1, 3)])
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]}")
# 损失函数与优化器
loss_function = nn.CrossEntropyLoss(
weight=torch.Tensor([train_dataset.count_SA() / (len(train_dataset) - train_dataset.count_SA()), 1]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
# optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 整点图看看
for one_batch in train_loader:
visual_segment(one_batch, sub_save_dir / "sample")
break
del one_batch
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,92 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net014.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/14
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 修改激活函数
# 提高呼吸采样率
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 10
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET023(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET023, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=64,
num_layers=1,
bidirectional=True,
batch_first=True)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear(128, 8),
nn.GELU(),
nn.Linear(8, num_classes),
)
self.right = nn.Sequential(
nn.Conv1d(in_channels=1, out_channels=8, kernel_size=1, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm1d(8),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x1, x2):
x1, (_, _) = self.lstm(x1)
# print(x1.shape)
x1 = x1[:, -1]
x1 = torch.flatten(x1, start_dim=1)
# print(x1.shape)
# print(x2.shape)
# x2 = x2.squeeze()
# print(x2.shape)
x2 = self.right(x2)
# print(x2.shape)
# x2 = torch.flatten(x2, start_dim=1)
# x = torch.cat((x1, x2), dim=1)
# print(x[0])
x = x1
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET023().cuda()
summary(model, [(32, 300, 1), (32, 1, 3)])

80
exam/043/my_augment.py Normal file
View File

@ -0,0 +1,80 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset -= dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.5, order=4)
dataset_low = (dataset_low - dataset_low.mean()) / dataset_low.std()
# dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low}
# "high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
# dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::10]
# 获取整段的特征 31
# 按照十秒窗获取 33
# 按照十秒窗步进两秒获取 321
sub_windows_size = 30
stride = 1
manual_feature = [[], [], []]
SP = 0
EP = sub_windows_size
while EP <= sub_windows_size:
# mean
manual_feature[0].append(abs(dataset_low[SP:EP]).mean())
# var
manual_feature[1].append(abs(dataset_low[SP:EP]).var())
# RMS
manual_feature[2].append(np.sqrt((dataset_low[SP:EP] ** 2).mean()))
SP += stride
EP += stride
dataset_low = dataset_low.reshape(-1, 1)
manual_feature = np.array(manual_feature)
manual_feature = manual_feature.reshape(1, -1)
# _, _, dataset_high = stft(dataset_high, 100, nperseg=50)
# dataset_high = dataset_high.astype(np.float).T
# dataset_high = dataset_high.reshape(dataset_high.shape[0], dataset_high.shape[1])
# return dataset_low, dataset_high
return dataset_low, manual_feature
if __name__ == '__main__':
pass

77
exam/043/settings.yaml Normal file
View File

@ -0,0 +1,77 @@
# environment config
GPU: "0"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET023
select_sampno:
- 88
- 220
- 221
- 229
- 282
- 286
- 541
- 579
- 582
- 670
- 671
- 683
- 703
- 704
- 726
- 735
- 736
- 933
- 935
- 952
- 954
- 955
- 960
- 961
- 962
- 966
- 967
- 969
- 971
- 972
- 1000
- 1004
- 1006
- 1009
- 1010
- 1296
- 1300
- 1301
- 1302
- 1308
- 1354
- 1374
- 1378
- 1478
# train hyperparameters config
epoch: 100
lr: 0.0001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID010 RESP "
tags: ["ReduceLROnPlateau", "RESP LSTM"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,477 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import logging
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net023 import HYBRIDNET018
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 0
# 最小事件长度
thresh_event_length = 2
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.ticker').disabled = True
logger = logging.getLogger()
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
if (exam_path / "test.log").exists():
(exam_path / "test.log").unlink()
fh = logging.FileHandler(exam_path / "test.log", mode='a')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
logger.info("------------------------------------")
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
logger.info(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
else:
sampNo = None
logger.info("出错了")
exam_name = Path("./").absolute().name
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, labels = one[:2]
other_info = one[2:]
resp = resp.float().cuda() if gpu else resp.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(f"EXAM_NAME: {exam_name} SampNO: {sampNo}")
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
logger.info(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
logger.info(base_path, dataset_type, "is None")
return
if df_analysis.empty:
logger.info(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
# if value > 30:
# logger.info([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
# for segment_no in df["segmentNo"].unique():
# df_temp = df[df["segmentNo"] == segment_no]
# if df_temp["thresh_Pred"].sum() > 0:
# continue
#
# df_event = df_event.append(pd.DataFrame(
# [[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
# (segment_no + 1) * 30, 0]], columns=columns),
# ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:train_set_visual.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/10/15
@description: 检查数据集是否按照预期处理
"""
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
fig = plt.figure(figsize=(10, 2), dpi=200)
plt.tight_layout()
def visual_segment(one, save_dir):
save_dir.mkdir(exist_ok=True, parents=True)
one = [two.cpu().numpy() for two in one]
segment, stft, label_type, PN, segmentNo, label_type, new_label, SP, EP = one
print("drawing sample")
for i in tqdm(range(len(one[0]) if len(one[0]) < 128 else 128)):
plt.clf()
plt.plot(np.linspace(SP[i], EP[i], len(segment[i])), segment[i])
plt.title(f"sampNo:{PN[i]} label_type:{label_type[i]} {SP[i]}:{EP[i]}")
plt.xlabel("时间 秒")
plt.ylabel("心晓信号幅值")
plt.tight_layout()
plt.savefig(save_dir / f"sampNo {PN[i]} {SP[i]} {EP[i]} label_type {label_type[i]}")
if __name__ == '__main__':
pass

View File

@ -207,6 +207,55 @@ TODO: 质量好的信号结果分析;严重程度的分析
300个点保持300 1
--------------------------------------------------------
--------------------------------------------------------
**全量**整晚数据集训练 110class_weight 解决不平衡问题,
数据集减去平均值外,
--------------------------------------------------------
041
学习率 1e-3 epoch 300
呼吸10Hz LSTM + handcraft to Fully connected
整窗handcraft
300个点, 300, 1 + 3, 1
--------------------------------------------------------
--------------------------------------------------------
**全量**整晚数据集训练 110class_weight 解决不平衡问题,
数据集减去平均值外,
--------------------------------------------------------
041
学习率 1e-3 epoch 300
呼吸10Hz LSTM + handcraft to Fully connected
整窗handcraft
300个点, 300, 1 + 3, 1
------------------------------------------------------
--------------------------------------------------------
**全量**整晚数据集训练 110class_weight 解决不平衡问题,
数据集减去平均值外Z-score
--------------------------------------------------------
042
学习率 1e-3 epoch 300
呼吸10Hz LSTM + handcraft to Fully connected
整窗handcraft
300个点, 300, 1 + 3, 1
--------------------------------------------------------
--------------------------------------------------------
**全量**整晚数据集训练 110class_weight 解决不平衡问题,
数据集减去平均值外Z-score
--------------------------------------------------------
043
学习率 1e-3 epoch 300
呼吸10Hz LSTM + handcraft CNN to Fully connected
整窗handcraft
300个点, 300, 1 + 3, 1
--------------------------------------------------------