增加实验

This commit is contained in:
andrew 2022-10-06 23:08:35 +08:00
parent 8bdc1f736d
commit 8209b13e08
33 changed files with 4294 additions and 30 deletions

2
.gitignore vendored
View File

@ -138,3 +138,5 @@ dmypy.json
# Cython debug symbols
cython_debug/
# custom
.idea/

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -446,9 +446,9 @@ def segment_to_event(df_segment, dataset_type):
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_0"))
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
# test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,561 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 1
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
select_numpy_len = len(select_numpy)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label)
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
# 对于无事件的样本,直接将所有的片段
if len(segment_labels) == 0:
# 剔除补零片段(把开始点移动到补零结束)
normal_SP = not_zero_point
# 开头至少满足一个窗长
if normal_SP < window_second:
normal_SP = window_second
# 结束点为样本总长 除以 采样率
normal_EP = select_numpy_len // 100
label_type = 0
# 正常时间编号为秒数 除以 30即epoch
negative_labels += [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1,
SP1 + window_second] for SP1 in
range(normal_SP - window_second + step_second, normal_EP - window_second + step_second,
window_second)]
# 对于有事件的样本
# 遍历事件,获取事件之间的背景片段
for index in range(len(segment_labels) + 1):
# 前一个事件的结尾 与 下一事件开头即为背景
# 对于开头的无事件片段则设定开始事件为0
if index == 0:
normal_SP = 0
# 非开头片段,开始点为上一个事件的结尾
else:
# 加一秒 作为 缓冲
normal_SP = segment_labels[index - 1][-1] + 1
# 最后一个事件则取到样本片段结尾
if index == len(segment_labels):
normal_EP = select_numpy_len // 100 - window_second
# 否则结束事件取 本事件开头
else:
# 减一秒 作为 缓冲
normal_EP = segment_labels[index][-2] - 1
# 剔除包含开头补零的片段
if normal_EP < not_zero_point:
continue
# 剔除开头不足30s的正常片段
if normal_SP < window_second:
continue
label_type = 0
# 将背景事件按照滑窗距离逐个加入到背景事件中
temp_1 = [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1, SP1 + window_second]
for SP1 in range(normal_SP - window_second, normal_EP - window_second, window_second)]
negative_labels += temp_1
train_label, valid_label, test_label = [], [], []
# assert (valid_ratio + test_ratio) < 1 <= len(segment_labels), f"{sampNo}训练集与测试集数量应小于总数据集数量{len(segment_labels)}"
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
event_label = np.zeros(select_numpy_len // 100)
quality_label = np.zeros(select_numpy_len // 100)
# 用于存储事件标签
for PN, segmentNo, label_type, new_label, SP, EP in segment_labels:
event_label[SP:EP] = label_type
test_label = []
# 剔除补零片段
normal_SP = not_zero_point
if normal_SP < window_second:
normal_SP = window_second
normal_EP = select_numpy_len // 100
# 分成指定窗长的滑窗片段
test_label += [
[sampNo, SP1 // 30, int(event_label[SP1 + window_second - step_second]),
int(quality_label[SP1 + window_second - step_second]),
SP1, SP1 + window_second] for SP1 in range(normal_SP - window_second + step_second,
normal_EP - window_second + step_second, step_second)]
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
# 分别将各事件切分为30s, 步进1秒的片段
for index in range(train_segment_num):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
train_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second, step_second)]
for index in range(train_segment_num, valid_segment_num):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
valid_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second, step_second)]
for index in range(valid_segment_num, len(segment_labels)):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
test_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second * step_second)]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
# if 0 < valid_ratio < 1:
# train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
# valid_eve2 = int(len(negative_labels) * valid_ratio)
# else:
# train_eve2 = len(negative_labels) - valid_ratio - test_ratio
# valid_eve2 = valid_ratio
#
# test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# 直接补充到足够个数的背景事件
train_eve2 = max(train_eve, 300)
valid_eve2 = max(valid_eve, 40)
test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[:train_eve2]:
label_type = int(label_type)
train_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
in
range(normal_SP, normal_EP, step_second)]
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
train_eve2: train_eve2 + valid_eve2]:
label_type = int(label_type)
valid_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
in
range(normal_SP, normal_EP, step_second)]
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
train_eve2 + valid_eve2:train_eve2 + valid_eve2 + test_eve2]:
label_type = int(label_type)
test_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(normal_SP, normal_EP, step_second)]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

178
exam/004/load_dataset.py Normal file
View File

@ -0,0 +1,178 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1))
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset(ApneaDataset):
def __init__(self, data_path, label_path, dataset_type, select_sampno, segment_augment=None):
super(TestApneaDataset, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
select_sampno=select_sampno,
segment_augment=segment_augment
)
def __getitem__(self, index):
PN, segmentNo, label_type, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
segment = datasets[str(PN)][int(SP) * 100:int(EP) * 100].copy()
if self.segment_augment is not None:
segment = self.segment_augment(segment)
return segment, int(float(label_type) > 1), PN, segmentNo, SP, EP
else:
raise Exception(f'dataset read failure!')
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

288
exam/004/main.py Normal file
View File

@ -0,0 +1,288 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net001 import HYBRIDNET001
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, (resp, stft, labels) in pbar:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
# 强行归一化数据
# segments = F.normalize(segments)
# print(segments.size())
# 减去平均值
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
loss = loss_func(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for resp, stft, labels in valid_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
valid_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(1, 120, 1), (1, 1, 121, 26)])
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[1, (len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]}")
# 损失函数与优化器
loss_function = nn.CrossEntropyLoss(
weight=torch.Tensor([1, (len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
# optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,97 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net001.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/30
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 4
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET001(nn.Module):
def __init__(self, num_classes=2, init_weights=True):
super(HYBRIDNET001, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=16,
num_layers=1,
bidirectional=True,
batch_first=True)
self.right = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(16),
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(32),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(32)
)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear((120 * 32 + 32 * 16 * 4), 512),
nn.ReLU(inplace=True),
nn.Linear(512, num_classes),
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x1, x2):
x1, (_, _) = self.lstm(x1)
x2 = self.right(x2)
# print(x1.shape)
# print(x2.shape)
x1 = torch.flatten(x1, start_dim=1)
x2 = torch.flatten(x2, start_dim=1)
x = torch.cat((x1, x2), dim=1)
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET001(2).cuda()
summary(model, [(32, 120, 1), (32, 1, 121, 26)])

51
exam/004/my_augment.py Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset = dataset - dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.7, order=6)
dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low,
"high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::25]
dataset_low = dataset_low.reshape(dataset_low.shape[0], 1)
_, _, dataset_high = stft(dataset_high, 100, nperseg=50)
dataset_high = dataset_high.astype(np.float).T
dataset_high = dataset_high.reshape(1, dataset_high.shape[0], dataset_high.shape[1])
return dataset_low, dataset_high
if __name__ == '__main__':
pass

42
exam/004/settings.yaml Normal file
View File

@ -0,0 +1,42 @@
# environment config
GPU: "1"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET001
select_sampno:
- 282
- 726
- 229
- 582
- 286
- 966
- 1000
- 1004
- 1006
- 1009
# train hyperparameters config
epoch: 50
lr: 0.00001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID001 NOBD STFT RESP DUAL"
tags: ["ReduceLROnPlateau", "RESP LSTM", "STFT CNN"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,454 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net001 import HYBRIDNET001
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 2
# 最小事件长度
thresh_event_length = 8
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
print(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, stft, labels = one[:3]
other_info = one[3:]
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp, stft)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
print(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
print(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
print(base_path, dataset_type, "is None")
return
if df_analysis.empty:
print(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
if value > 30:
print([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
for segment_no in df["segmentNo"].unique():
df_temp = df[df["segmentNo"] == segment_no]
if df_temp["thresh_Pred"].sum() > 0:
continue
df_event = df_event.append(pd.DataFrame(
[[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
(segment_no + 1) * 30, 0]], columns=columns),
ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -0,0 +1,561 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:generate_label_11.0.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/05
"""
# 14.0
# 手动均衡数量
# 13.0
# 限制选择部分数据集,先做测试
# 12.0
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
# 10.0
# 使用提出质量差的信号
# 9.0
# 增加 最新的质量标签 未使用
# 8.0
# 生成 除低通气所有事件标签
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
# 先提剔除极端值
# 数值大于最高基准线或最低基准线
# type1 average:1800 low:1200 high:2400
# type2: average:2400 low:1800 high:3000
# 过多片段会造成平均值偏移
# TODO
# 加入体动标签,计算除体动外的平均值
# 最后降采为100hz
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import yaml
from pathos import multiprocessing
from tqdm import tqdm
# 数据集 和 标签 位置
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
# BCG 记录开始时间
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
dtype=object)
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
# 读取每个数据集路径
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
all_numpy_dataset.sort()
# 划分后的数据集保存路径
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
dataset_save_path = Path(r"./dataset/")
dataset_save_path.mkdir(exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
fh.setLevel(logging.NOTSET)
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
fh.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(ch)
# all_label = []
# 输出统计数据标题栏
# logger.info("sampNo".center(8) +
# "hpy_num".center(8) + "hpy_time".center(10) +
# "csa_num".center(8) + "csa_time".center(10) +
# "osa_num".center(8) + "osa_time".center(10) +
# "msa_num".center(8) + "msa_time".center(10)
# )
logger.info("sampNo".center(8) + ',' +
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
)
base_random_seed = 42
window_second = 30
step_second = 1
valid_ratio = 0.1
test_ratio = 0.1
normal_event_quality_label = 0
# valid_ratio = 5000
# test_ratio = 10000
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
# dataset sampNo for test
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
select_dataset = hyp["select_sampno"]
# 需要置成0的片段前面不一定补零还有可能上万
disable_segment = {
'221': [[0, 10000]],
'670': [[0, 20000]],
'683': [[0, 20000]],
'704': [[0, 26000]],
'726': [[0, 20000]],
'736': [[0, 47000]],
'933': [[0, 773560]],
'935': [[0, 26600]],
'952': [[0, 17000]],
'955': [[0, 78000]],
'961': [[0, 107000]],
'962': [[0, 15100]],
'966': [[0, 13120]],
'967': [[0, 44000]],
'1006': [[0, 60000]],
'1009': [[0, 1000]],
'1010': [[0, 49000]],
'1296': [[0, 27000]],
'1300': [[0, 33800]],
'1301': [[0, 14000]],
'1302': [[0, 5600]],
'1374': [[0, 1000]],
'1478': [[0, 998000]],
}
# 生成数据集主函数
def generate_label(No, dataset_path):
"""
:param dataset_path: 数据集路径
:return:
"""
# 获取数据编号
sampNo = dataset_path.stem.split("samp")[0]
# 标签路径
label_path = bcg_label_path / f"export{sampNo}_all.csv"
if not label_path.exists():
raise FileNotFoundError(f"{label_path} not exist")
if not dataset_path.exists():
raise Exception(f"{dataset_path} not exists")
# 加载数据集
select_numpy = np.load(dataset_path)
select_numpy_len = len(select_numpy)
# 开头不合理片段置零
if sampNo in disable_segment.keys():
for sp, ep in disable_segment[sampNo]:
select_numpy[sp:ep] = 0
# 剔除质量差信号
if sampNo == "670":
select_numpy = select_numpy[:17195 * 100]
# 获取前面补了多少0
not_zero_point = 0
for num in select_numpy:
if num > 10:
break
not_zero_point += 1
not_zero_point //= 100
# 读取标签
label_csv = pd.read_csv(label_path, encoding='gbk')
label_csv["new_start"] = label_csv["new_start"].astype("int")
label_csv["new_end"] = label_csv["new_end"].astype("int")
label_csv["Duration"] = label_csv["Duration"].astype("int")
label_csv["new_label"] = label_csv["new_label"].fillna("2")
label_csv["new_label"] = label_csv["new_label"].astype("int")
# 剔除质量不好的样本
# drop_csv = label_csv[
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
# label_csv = label_csv.drop(drop_csv.index)
# 事件片段与背景片段, 每个背景长度均为设定窗长
segment_labels = []
negative_labels = []
hpy_num = csa_num = osa_num = msa_num = 0
hpy_time = csa_time = osa_time = msa_time = 0
# 遍历全部事件并统计
for i in range(len(label_csv)):
# 进行LabelEncoder
label = label_csv.iloc[i, :]
# 如果事件在补零片段,则不添加到事件列表
if label["new_end"] < not_zero_point:
continue
if sampNo == "670" and label["new_start"] > 17195:
continue
if label["new_end"] - label["new_start"] < 10:
continue
# 将事件添加到事件列表
if label["Event type"] == "Hypopnea":
label_type = 1
hpy_num += 1
hpy_time += label["new_end"] - label["new_start"]
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
# negative_labels.append(
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
continue
elif label["Event type"] == "Central apnea":
label_type = 2
csa_num += 1
csa_time += label["new_end"] - label["new_start"]
elif label["Event type"] == "Obstructive apnea":
label_type = 3
osa_num += 1
osa_time += label["new_end"] - label["new_start"]
# MSA 认为是OSA
elif label["Event type"] == "Mixed apnea":
label_type = 3
msa_num += 1
msa_time += label["new_end"] - label["new_start"]
else:
continue
# label_type = 0
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
print(sampNo, label)
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
# logger.info(sampNo.center(8) +
# str(hpy_num).center(8) + str(hpy_time).center(10) +
# str(csa_num).center(8) + str(csa_time).center(10) +
# str(osa_num).center(8) + str(osa_time).center(10) +
# str(msa_num).center(8) + str(msa_time).center(10))
# 设置随机树种子
random_seed = base_random_seed + int(sampNo)
# 对于无事件的样本,直接将所有的片段
if len(segment_labels) == 0:
# 剔除补零片段(把开始点移动到补零结束)
normal_SP = not_zero_point
# 开头至少满足一个窗长
if normal_SP < window_second:
normal_SP = window_second
# 结束点为样本总长 除以 采样率
normal_EP = select_numpy_len // 100
label_type = 0
# 正常时间编号为秒数 除以 30即epoch
negative_labels += [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1,
SP1 + window_second] for SP1 in
range(normal_SP - window_second + step_second, normal_EP - window_second + step_second,
window_second)]
# 对于有事件的样本
# 遍历事件,获取事件之间的背景片段
for index in range(len(segment_labels) + 1):
# 前一个事件的结尾 与 下一事件开头即为背景
# 对于开头的无事件片段则设定开始事件为0
if index == 0:
normal_SP = 0
# 非开头片段,开始点为上一个事件的结尾
else:
# 加一秒 作为 缓冲
normal_SP = segment_labels[index - 1][-1] + 1
# 最后一个事件则取到样本片段结尾
if index == len(segment_labels):
normal_EP = select_numpy_len // 100 - window_second
# 否则结束事件取 本事件开头
else:
# 减一秒 作为 缓冲
normal_EP = segment_labels[index][-2] - 1
# 剔除包含开头补零的片段
if normal_EP < not_zero_point:
continue
# 剔除开头不足30s的正常片段
if normal_SP < window_second:
continue
label_type = 0
# 将背景事件按照滑窗距离逐个加入到背景事件中
temp_1 = [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1, SP1 + window_second]
for SP1 in range(normal_SP - window_second, normal_EP - window_second, window_second)]
negative_labels += temp_1
train_label, valid_label, test_label = [], [], []
# assert (valid_ratio + test_ratio) < 1 <= len(segment_labels), f"{sampNo}训练集与测试集数量应小于总数据集数量{len(segment_labels)}"
# 对于测试数据全部直接保存
if int(sampNo) in select_dataset:
event_label = np.zeros(select_numpy_len // 100)
quality_label = np.zeros(select_numpy_len // 100)
# 用于存储事件标签
for PN, segmentNo, label_type, new_label, SP, EP in segment_labels:
event_label[SP:EP] = label_type
test_label = []
# 剔除补零片段
normal_SP = not_zero_point
if normal_SP < window_second:
normal_SP = window_second
normal_EP = select_numpy_len // 100
# 分成指定窗长的滑窗片段
test_label += [
[sampNo, SP1 // 30, int(event_label[SP1 + window_second - step_second]),
int(quality_label[SP1 + window_second - step_second]),
SP1, SP1 + window_second] for SP1 in range(normal_SP - window_second + step_second,
normal_EP - window_second + step_second, step_second)]
logger.info(sampNo.center(8) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
',' + str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
)
df2.loc[No] = [sampNo,
str(0), str(0), str(0),
str(0), str(0), str(0),
str(len(test_label)),
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(0), str(0), str(len(segment_labels))]
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
# np.array(test_label))
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
index=False)
train_label, valid_label, test_label = [], [], []
# 对于训练与验证集样本
if True:
# 打乱片段顺序
np.random.seed(random_seed)
np.random.shuffle(segment_labels)
np.random.shuffle(negative_labels)
# 获取训练集、验证集、测试集分到事件个数
if 0 < valid_ratio < 1:
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
else:
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
valid_segment_num = valid_ratio
# 分别将各事件切分为30s, 步进1秒的片段
for index in range(train_segment_num):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
train_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second, step_second)]
for index in range(train_segment_num, valid_segment_num):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
valid_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second, step_second)]
for index in range(valid_segment_num, len(segment_labels)):
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
test_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(SP - window_second, EP - window_second + step_second * step_second)]
# 计算片段和事件个数
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
len(segment_labels) - valid_segment_num)
# 数据集补偿
# if train_num < 300:
# train_num = 300 - train_num
#
# if valid_num < 300:
# valid_num = 300 - valid_num
#
# if test_num < 300:
# test_num = 300 - test_num
# 获取训练集、验证集、测试集分到背景个数
# if 0 < valid_ratio < 1:
# train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
# valid_eve2 = int(len(negative_labels) * valid_ratio)
# else:
# train_eve2 = len(negative_labels) - valid_ratio - test_ratio
# valid_eve2 = valid_ratio
#
# test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
# 直接补充到足够个数的背景事件
train_eve2 = max(train_eve, 300)
valid_eve2 = max(valid_eve, 40)
test_eve2 = max(test_eve, 40)
# 强制背景数量
# train_eve2 = train_eve
# valid_eve2 = valid_eve
# test_eve2 = test_eve
# 添加背景事件数量
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[:train_eve2]:
label_type = int(label_type)
train_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
in
range(normal_SP, normal_EP, step_second)]
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
train_eve2: train_eve2 + valid_eve2]:
label_type = int(label_type)
valid_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
in
range(normal_SP, normal_EP, step_second)]
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
train_eve2 + valid_eve2:train_eve2 + valid_eve2 + test_eve2]:
label_type = int(label_type)
test_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1 in
range(normal_SP, normal_EP, step_second)]
logger.info(sampNo.center(8) + ',' +
str(len(train_label)).center(10) + ',' +
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
10) + ',' +
str(len(valid_label)).center(10) + ',' +
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
10) + ',' +
str(len(test_label)).center(10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
10) + ',' +
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
)
df2.loc[No] = [sampNo.center(8),
str(len(train_label)),
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
str(len(valid_label)),
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
str(len(test_label)),
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
str(train_eve), str(valid_eve), str(test_eve).center(10)]
def label_check(label_list):
temp_list = []
for sampNo, index, label_type, new_label, SP, EP in label_list:
if EP - SP < window_second:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if SP < 0:
print(sampNo, index, label_type, SP, EP)
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
for j in temp_list:
label_list.remove(j)
label_check(train_label)
label_check(valid_label)
label_check(test_label)
for sampNo, index, label_type, new_label, SP, EP in train_label:
if EP - SP < window_second:
print(sampNo, index, label_type, new_label, SP, EP)
if SP < 0:
print(sampNo, index, label_type, new_label, SP, EP)
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
df1 = pd.DataFrame(data=train_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
index=False)
df1 = pd.DataFrame(data=valid_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
index=False)
df1 = pd.DataFrame(data=test_label,
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
# np.array(train_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
# np.array(valid_label))
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
# np.array(test_label))
if __name__ == '__main__':
# pool = multiprocessing.Pool(processes=44)
# pool.map(generate_label, list(all_numpy_dataset))
# pool.close()
# pool.join()
df2 = pd.DataFrame(data=None,
columns=["sampNo",
"train_num", "train_P", "train_N",
"valid_num", "valid_P", "valid_N",
"test_num", "test_P", "test_N",
"train_eve", "valid_eve", "test_eve"])
temp = []
for one_dataset in all_numpy_dataset:
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
temp.append(one_dataset)
# for one_dataset in temp:
# all_numpy_dataset.remove(one_dataset)
for No, one_dataset in enumerate(temp):
generate_label(No, one_dataset)
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
# generate_label(all_numpy_dataset[0])

178
exam/005/load_dataset.py Normal file
View File

@ -0,0 +1,178 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:load_dataset.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2021/12/03
"""
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.Preprocessing import BCG_Operation
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
"""
1. 读取方法
# 无论是否提前切分均提前转成npy格式
# 1.1 提前预处理切分好后生成npy直接载入切分好的片段 内存占用多 读取简单
使用此方法 1.2 提前预处理载入整夜数据切分好后生成csv或xls根据片段读取 内存占用少 读取较为复杂
"""
datasets = {}
# 减少重复读取
def read_dataset(data_path, augment=None):
data_path = Path(data_path)
try:
f = []
if data_path.is_dir():
dataset_list = list(data_path.rglob("*.npy"))
dataset_list.sort()
f += dataset_list
elif data_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{data_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {data_path}: {e} \n')
print("loading dataset")
for i in tqdm(f):
select_dataset = np.load(i)
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
if augment is not None:
select_dataset = augment(select_dataset)
datasets[i.name.split("samp")[0]] = select_dataset
# 用第二种方法读取
class ApneaDataset(Dataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
self.data_path = data_path
self.label_path = label_path
self.segment_augment = segment_augment
self.labels = None
self.dataset_type = dataset_type
self.select_sampNo = select_sampno
# self._getAllData()
self._getAllLabels()
def __getitem__(self, index):
# PN patience number
# SP/EP start point, end point
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1))
else:
raise Exception(f'dataset read failure!')
def count_SA(self):
return sum(self.labels[:, 3] > 1)
def __len__(self):
return len(self.labels)
def _getAllLabels(self):
label_path = Path(self.label_path)
if not label_path.exists():
raise Exception(f'{self.label_path} does not exist')
try:
f = []
if label_path.is_dir():
if self.dataset_type == "train":
label_list = list(label_path.rglob("*_train_label.csv"))
elif self.dataset_type == "valid":
label_list = list(label_path.rglob("*_valid_label.csv"))
elif self.dataset_type == "test":
label_list = list(label_path.glob("*_sa_test_label.csv"))
# label_list = list(label_path.rglob("*_test_label.npy"))
elif self.dataset_type == "all_test":
label_list = list(label_path.rglob("*_sa_all_label.csv"))
else:
raise ValueError("self.dataset type error")
# label_list = list(label_path.rglob("*_label.npy"))
label_list.sort()
f += label_list
elif label_path.is_file():
raise Exception(f'dataset path should be a dir')
else:
raise Exception(f'{self.label_path} does not exist')
except Exception as e:
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
print("loading labels")
for i in tqdm(f):
if int(i.name.split("_")[0]) not in self.select_sampNo:
continue
if self.labels is None:
self.labels = pd.read_csv(i).to_numpy(dtype=int)
else:
labels = pd.read_csv(i).to_numpy(dtype=int)
if len(labels) > 0:
self.labels = np.concatenate((self.labels, labels))
# self.labels = self.labels[:10000]
print(f"{self.dataset_type} length is {len(self.labels)}")
class TestApneaDataset(ApneaDataset):
def __init__(self, data_path, label_path, dataset_type, select_sampno, segment_augment=None):
super(TestApneaDataset, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
select_sampno=select_sampno,
segment_augment=segment_augment
)
def __getitem__(self, index):
PN, segmentNo, label_type, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
segment = datasets[str(PN)][int(SP) * 100:int(EP) * 100].copy()
if self.segment_augment is not None:
segment = self.segment_augment(segment)
return segment, int(float(label_type) > 1), PN, segmentNo, SP, EP
else:
raise Exception(f'dataset read failure!')
class TestApneaDataset2(ApneaDataset):
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
super(TestApneaDataset2, self).__init__(
data_path=data_path,
label_path=label_path,
dataset_type=dataset_type,
segment_augment=segment_augment,
select_sampno=select_sampno
)
def __getitem__(self, index):
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
if isinstance(datasets, dict):
dataset = datasets[str(PN)]
segment = self.segment_augment(dataset, SP, EP)
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
else:
raise Exception(f'dataset read failure!')
if __name__ == '__main__':
pass

284
exam/005/main.py Normal file
View File

@ -0,0 +1,284 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@time:2021/10/15
"""
import os
import yaml
import logging
from pathlib import Path
import time
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torch.cuda
from tqdm import tqdm
from torchinfo import summary
from load_dataset import ApneaDataset, read_dataset
from torch import nn
from utils.calc_metrics import CALC_METRICS
from sklearn.model_selection import KFold
from model.Hybrid_Net001 import HYBRIDNET001
# from utils.LossFunction import Foca1lLoss
from my_augment import my_augment, my_segment_augment
# 加载配置
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader)
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
os.environ["WANDB_MODE"] = "dryrun"
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# 读取地址参数
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
save_dir.mkdir(parents=True, exist_ok=True)
# 设置日志
logger = logging.getLogger()
logger.setLevel(logging.NOTSET)
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
fh.setLevel(logging.NOTSET)
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.NOTSET)
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
logger.addHandler(ch)
logging.getLogger('matplotlib.font_manager').disabled = True
logger.info("------------------------------------")
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
# 备份配置
with open(save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
# Hyper-parameters
gpu = torch.cuda.is_available()
epochs = hyp["epoch"]
lr = hyp["lr"]
nc = hyp["nc"]
bs = hyp["batch_size"]
worker = hyp["number_worker"]
select_sampno = hyp["select_sampno"]
read_dataset(data_path, augment=my_augment)
calc_metrics = CALC_METRICS(nc)
# 开始训练
# 训练
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
model.train()
train_loss = 0.0
optimizer.zero_grad()
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
pbar.set_description(training_state)
for i, (resp, stft, labels) in pbar:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = out.squeeze(-1)
loss = loss_func(out, labels.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 余弦退火传入变量
# scheduler.step(epoch + i / len(train_loader.dataset))
# 自适应调整传入变量
scheduler.step(loss)
loss_value = loss.item()
train_loss += loss_value
# cur_lr = optimizer.param_groups[-1]['lr']
# labels = torch.unsqueeze(labels, dim=1)
# out = F.softmax(out, dim=1)
# out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# if i % 20 == 0:
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
cur_lr = optimizer.param_groups[-1]['lr']
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
def model_valid(model, valid_loader, wdir, loss_func):
model.eval()
valid_loss = 0.0
for resp, stft, labels in valid_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = out.squeeze(-1)
# out = F.softmax(out, dim=1)
loss = loss_func(out, labels.float())
valid_loss += loss.item()
# labels = torch.unsqueeze(labels, dim=1)
# out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
valid_loss /= len(valid_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
global best_f1
valid_f1 = calc_metrics.metrics[-1].compute()
if valid_f1 > best_f1:
best_f1 = valid_f1
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
torch.save(model.state_dict(), wdir / f"best.pt")
if wandb is not None:
wandb.run.summary["best_f1"] = valid_f1
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = out.squeeze(-1)
# out = F.softmax(out, dim=1)
loss = loss_func(out, labels.float())
test_loss += loss.item()
# labels = torch.unsqueeze(labels, dim=1)
# out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
import wandb
except ImportError:
wandb = None
prefix = 'wandb: '
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
if wandb is not None and wandb.run is None:
wandb_run = wandb.init(
config=hyp,
name=save_dir.stem,
project=hyp["project"],
notes=hyp["Note"],
tags=hyp["tags"],
entity=hyp["entity"],
)
exam_name = Path("./").absolute().name
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
summary(model_net, [(1, 120, 1), (1, 1, 121, 26)])
time.sleep(3)
if gpu:
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
train_set = [select_sampno[i] for i in train_ids]
test_set = [select_sampno[i] for i in test_ids]
logger.info(f'Train_Set:{train_set}')
logger.info(f'Independent_Test_Set:{test_set}')
sub_save_dir = save_dir / f"KFold_{fold}"
sub_save_dir.mkdir(exist_ok=True, parents=True)
wdir = sub_save_dir / "weights"
wdir.mkdir(exist_ok=True, parents=True)
hyp["train_set"] = train_set
hyp["test_set"] = test_set
with open(sub_save_dir / 'settings.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
model_net = eval(hyp["model_name"])()
model_net.initialize_weights()
if gpu:
model_net.cuda()
logger.info(f"Weight is {[(len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]}")
# 损失函数与优化器
loss_function = nn.BCELoss(
weight=torch.Tensor([(len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]).cuda())
# loss_func = nn.BCEWithLogitsLoss()
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
# momentum
# nesterov 牛顿动量
# weight_decay L2正则
# optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=2836, min_lr=1e-8,
verbose=True)
# 参数记录
best_f1 = 0
for epoch in range(epochs):
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -0,0 +1,98 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:andrew
@file:Hybrid_Net001.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/09/30
"""
import os
import torch
from torch import nn
from torchinfo import summary
from torch import cat
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# 输入时长
WHOLE_SEGMENT_SECOND = 30
# 呼吸采样率
RESPIRATORY_FRE = 4
# BCG 时频图大小
BCG_GRAPH_SIZE = (26, 121)
class HYBRIDNET001(nn.Module):
def __init__(self, num_classes=1, init_weights=True):
super(HYBRIDNET001, self).__init__()
self.lstm = nn.LSTM(input_size=1,
hidden_size=16,
num_layers=1,
bidirectional=True,
batch_first=True)
self.right = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(16),
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(32),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3),
stride=(1, 1), padding=(1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
nn.BatchNorm2d(32)
)
self.classifier = nn.Sequential(
# nn.Dropout(p=0.5),
nn.Linear((120 * 32 + 32 * 16 * 4), 512),
nn.ReLU(inplace=True),
nn.Linear(512, num_classes),
nn.Sigmoid()
)
if init_weights:
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def forward(self, x1, x2):
x1, (_, _) = self.lstm(x1)
x2 = self.right(x2)
# print(x1.shape)
# print(x2.shape)
x1 = torch.flatten(x1, start_dim=1)
x2 = torch.flatten(x2, start_dim=1)
x = torch.cat((x1, x2), dim=1)
x = self.classifier(x)
return x
if __name__ == '__main__':
model = HYBRIDNET001(1).cuda()
summary(model, [(32, 120, 1), (32, 1, 121, 26)])

51
exam/005/my_augment.py Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:my_augment.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/07/26
"""
from utils.Preprocessing import BCG_Operation
import numpy as np
from scipy.signal import stft
preprocessing = BCG_Operation()
preprocessing.sample_rate = 100
def my_augment(dataset):
dataset = dataset - dataset.mean()
dataset = preprocessing.Iirnotch(dataset)
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.7, order=6)
dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
dataset = {"low": dataset_low,
"high": dataset_high}
return dataset
def get_stft(x, fs, n):
print(len(x))
f, t, amp = stft(x, fs, nperseg=n)
z = np.abs(amp.copy())
return f, t, z
def my_segment_augment(dataset, SP, EP):
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
dataset_low = dataset_low[::25]
dataset_low = dataset_low.reshape(dataset_low.shape[0], 1)
_, _, dataset_high = stft(dataset_high, 100, nperseg=50)
dataset_high = dataset_high.astype(np.float).T
dataset_high = dataset_high.reshape(1, dataset_high.shape[0], dataset_high.shape[1])
return dataset_low, dataset_high
if __name__ == '__main__':
pass

42
exam/005/settings.yaml Normal file
View File

@ -0,0 +1,42 @@
# environment config
GPU: "1"
# dataset config
Path:
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
label: ./dataset/
save: ./output/
batch_size: 256
number_worker: 0
model_name: HYBRIDNET001
select_sampno:
- 282
- 726
- 229
- 582
- 286
- 966
- 1000
- 1004
- 1006
- 1009
# train hyperparameters config
epoch: 50
lr: 0.00001
nc: 1
# wandb config
entity: "marques"
project: "Sleep_Apnea_HYBRID00X"
Note: "HYBRID001 NOBD STFT RESP DUAL"
tags: ["ReduceLROnPlateau", "RESP LSTM", "STFT CNN"]
# "CW":class_weight
# "CosineAnnealingLR"
# "ReduceLROnPlateau"

View File

@ -0,0 +1,454 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:test_analysis.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/21
"""
import os
import sys
import pandas as pd
import torch.cuda
import numpy as np
import yaml
from matplotlib import pyplot as plt
from tqdm import tqdm
from pathlib import Path
from torch.nn import functional as F
from torch.utils.data import DataLoader
from load_dataset import TestApneaDataset2, read_dataset
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
from torch import nn
from utils.calc_metrics import CALC_METRICS
from my_augment import my_augment, my_segment_augment
from model.Hybrid_Net001 import HYBRIDNET001
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
exam_path = Path("./output/")
# 置信率阈值
thresh = 0.5
# 间隔最小距离
thresh_event_interval = 2
# 最小事件长度
thresh_event_length = 8
#
event_thresh = 1
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
severity_label = {"all": "none"}
severity_df = pd.read_excel(severity_path)
for one_data in severity_df.index:
one_data = severity_df.loc[one_data]
severity_label[str(one_data["数据编号"])] = one_data["程度"]
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
gpu = torch.cuda.is_available()
num_classes = 1
calc_metrics = CALC_METRICS(num_classes)
with open("./settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
read_dataset(data_path, augment=my_augment)
del hyp
# 默认取最新的文件夹
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
train_set, test_set = None, None
loss_func = nn.CrossEntropyLoss()
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
def set_environment(i):
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
model, model_name, train_set, test_set
output_path = all_output_path[i]
print(output_path)
segments_results_save_path = (output_path / "segments_results")
segments_results_save_path.mkdir(exist_ok=True)
events_results_save_path = (output_path / "events_results")
events_results_save_path.mkdir(exist_ok=True)
# 加载配置
with open(output_path / "settings.yaml") as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
data_path = hyp["Path"]["dataset"]
label_path = hyp["Path"]["label"]
train_set = hyp["train_set"]
test_set = hyp["test_set"]
model_path = output_path / "weights" / "best.pt"
model = eval(hyp["model_name"])()
model_name = hyp["model_name"]
model.load_state_dict(torch.load(model_path))
model.cuda()
model.eval()
def test_and_analysis_and_visual(dataset_type):
if dataset_type == "test":
sampNo = train_set
elif dataset_type == "all_test":
sampNo = test_set
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
segment_augment=my_segment_augment)
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
test_loss = 0.0
df_segment = pd.DataFrame(columns=columns)
for one in tqdm(test_loader, total=len(test_loader)):
resp, stft, labels = one[:3]
other_info = one[3:]
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
out = model(resp, stft)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = F.softmax(out, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
# one[0] = list(one[0].cpu().numpy())
# one[1] = list(one[1].cpu().numpy())
# one = one[1:]
# out = out.view(1, -1).cpu().numpy().tolist()
# one += out
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
one2 = one2.transpose((1, 0))
df = pd.DataFrame(data=one2, columns=columns)
df_segment = df_segment.append(df, ignore_index=True)
test_loss /= len(test_loader)
calc_metrics.compute()
print(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
calc_metrics.reset()
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
# 片段级分析
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
# 绘制混淆矩阵
# 每个样本都绘制一份
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
# 绘制柱状图
# 事件级分析
# 对于inner_test 每个编号就是一个事件
# 而对于整晚的independence_test需要另行计算
df_all_event = segment_to_event(df_segment, dataset_type)
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
# 剔除质量不好的样本
df_bad_segment = df_segment[
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
df_select_segment = df_segment.drop(df_bad_segment.index)
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
dataset_type)
df_select_event = segment_to_event(df_select_segment, dataset_type)
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
is_event=True)
def analysis_results(df_result, base_path, dataset_type, is_event=False):
if df_result.empty:
print(base_path, dataset_type, "is_empty")
return None
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
all_sampNo = df_result["sampNo"].unique()
df_metrics = pd.DataFrame(columns=columns2)
df_metrics.loc[0] = 0
df_metrics.loc[0]["sampNo"] = dataset_type
for index, sampNo in enumerate(all_sampNo):
df = df_result[df_result["sampNo"] == sampNo]
df.to_csv(
base_path / dataset_type /
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
index=False)
df_metrics.loc[index + 1] = np.NAN
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
df_metrics.loc[index + 1]["support"] = df.shape[0]
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# continue
# else:
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
df_metrics.loc[index + 1]["TP"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["FP"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
df_metrics.loc[index + 1]["TN"] = \
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[index + 1]["FN"] = \
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
col] != 0 else np.NAN
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
df_metrics.loc[index + 1][col]
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
# return None
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
# 在inner_test中根据 分严重程度绘制
if dataset_type == "test":
all_severity = ["正常", "轻度", "中度", "重度"]
for index, severity in enumerate(all_severity):
df_event = df_metrics[df_metrics["severity"] == severity]
df_temp = pd.DataFrame(columns=columns2)
df_temp.loc[0] = 0
df_temp.loc[0]["sampNo"] = severity
df_temp.loc[0]["severity"] = str(index + 1)
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
df_temp.loc[0]["T"] += df_event["T"].sum()
df_temp.loc[0]["F"] += df_event["F"].sum()
df_temp.loc[0]["TP"] += df_event["TP"].sum()
df_temp.loc[0]["FP"] += df_event["FP"].sum()
df_temp.loc[0]["TN"] += df_event["TN"].sum()
df_temp.loc[0]["FN"] += df_event["FN"].sum()
df_temp.loc[0]["support"] += df_event["support"].sum()
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
"spec", "pre", "NPV", "F1score"]:
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
df_metrics = df_metrics.append(df_temp, ignore_index=True)
df_backup = df_metrics
df_metrics = df_metrics.astype("str")
df_metrics = df_metrics.sort_values("severity")
df_metrics.to_csv(base_path / dataset_type /
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
index=False, encoding="gbk")
return df_backup
def confusionMatrix(df_analysis, base_path, dataset_type):
if df_analysis is None:
print(base_path, dataset_type, "is None")
return
if df_analysis.empty:
print(base_path, dataset_type, "is_empty")
return
classes = ["normal", "SA"]
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
for one_samp in df_analysis.index:
one_samp = df_analysis.loc[one_samp]
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
def segment_to_event(df_segment, dataset_type):
df_all_event = pd.DataFrame(columns=columns)
all_sampNo = df_segment["sampNo"].unique()
if dataset_type == "test":
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
all_segments_no = df["segmentNo"].unique()
for index_se, segment_No in enumerate(all_segments_no):
df_temp = df[df["segmentNo"] == segment_No].copy()
SP = df_temp.iloc[0]["EP"]
EP = df_temp.iloc[-1]["EP"] + 1
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
df_temp.iloc[0]["new_label"], SP, EP, 0]
thresh_Pred = df_temp["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for i in value_ar:
if i > thresh_event_length:
df_event.iloc[index_se]["pred"] = 1
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
# encoding="gbk")
df_all_event = df_all_event.append(df_event, ignore_index=True)
else:
for index, sampNo in enumerate(all_sampNo):
df_event = pd.DataFrame(columns=columns)
df = df_segment[df_segment["sampNo"] == sampNo].copy()
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
thresh_Pred = df["thresh_Pred"].values
thresh_Pred2 = thresh_Pred.copy()
# 扩充
for index_pred, pred in enumerate(thresh_Pred):
if pred == 0:
continue
for interval in range(1, thresh_event_interval):
if pred == 1 and index_pred + interval < thresh_Pred.size:
thresh_Pred2[index_pred + interval] = 1
else:
continue
# 判断
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
index_ar = np.where(same_ar)[0]
count_ar = np.diff(index_ar)
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
for value_index, value in enumerate(value_ar):
SP = index_ar[value_index]
EP = index_ar[value_index] + count_ar[value_index]
# TP, FP
if value > thresh_event_length:
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
label_type = df["label_type"][SP:EP].max()
new_label = df["new_label"][SP:EP].max()
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
SP, EP, thresh_Pred2[SP]]], columns=columns),
ignore_index=True)
if value > 30:
print([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
# 长度不够
else:
df["thresh_Pred"][SP:EP] = 0
# 对负样本进行统计
for segment_no in df["segmentNo"].unique():
df_temp = df[df["segmentNo"] == segment_no]
if df_temp["thresh_Pred"].sum() > 0:
continue
df_event = df_event.append(pd.DataFrame(
[[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
(segment_no + 1) * 30, 0]], columns=columns),
ignore_index=True)
df_all_event = df_all_event.append(df_event, ignore_index=True)
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
return df_all_event
# 分sampNo保存结果并不重合地可视化
# inner_test
# 分sampNo将与标签不一致的另行保存并不重合地可视化
# import shap
# explainer = shap.TreeExplainer()
# shap_values = explainer.shap_values()
if __name__ == '__main__':
all_output_path = list(exam_path.rglob("KFold_*"))
for exam_index, test_exam_path in enumerate(all_output_path):
# test_exam_path = exam_path / test_exam_path
set_environment(exam_index)
test_and_analysis_and_visual(dataset_type="test")
test_and_analysis_and_visual(dataset_type="all_test")

View File

@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:Draw_ConfusionMatrix.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/08/10
"""
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes) - 0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
normalize = False
fmt = '.2f' if normalize else 'd'
thresh = cm.max() * 0.8
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig_cm.tight_layout()
fig_cm.savefig(save_path)
plt.close()
#
if __name__ == '__main__':
pass

View File

@ -0,0 +1,181 @@
# encoding:utf-8
"""
@ date: 2020-09-16
@ author: jingxian
@ illustration: Pre-processing
"""
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from scipy import fftpack
def Dilate(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
return returndata
def Eorde(x, N, g, M):
returndata = np.array([])
for num in range(N - M + 1):
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
return returndata
def fin_turn(data, peak):
if len(data) == 0 or len(peak) == 0: return peak
return_peak = []
for p in peak:
minx, maxx = max(0, p - 100), min(len(data), p + 100)
return_peak.append(minx + np.argmax(data[minx: maxx]))
return return_peak
class BCG_Operation():
def __init__(self, sample_rate=1000):
self.sample_rate = sample_rate
def down_sample(self, data=None, down_radio=10):
if data is None:
raise ValueError("data is None, please given an real value!")
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
self.sample_rate = self.sample_rate / down_radio
return data
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
"""
分窗
:param len_win: length of window
:return: signal windows
"""
if (len_win is None) or (data is None):
raise ValueError("length of window or data is None, please given an real value!")
else:
length = len_win * self.sample_rate # number point of a window
# step of split windows
step = length * coverage
start = 0
Splitdata = []
while (len(data) - start >= length):
Splitdata.append(data[int(start):int(start + length)])
start += step
if calculate_to_end and (len(data) - start > 2000):
remain = len(data) - start
start = start - step
step = int(remain / 2000)
start = start + step * 2000
Splitdata.append(data[int(start):int(start + length)])
return np.array(Splitdata), step
elif calculate_to_end:
return np.array(Splitdata), 0
else:
return np.array(Splitdata)
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
"""
:param type: Type of Butter. filter, lowpass, bandpass, ...
:param lowcut: Low cutoff frequency
:param highcut: High cutoff frequency
:param order: Order of filter
:return: Signal after filtering
"""
if type == "lowpass": # 低通滤波处理
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "bandpass": # 带通滤波处理
low = low_cut / (self.sample_rate * 0.5)
high = high_cut / (self.sample_rate * 0.5)
b, a = signal.butter(order, [low, high], btype='bandpass')
return signal.filtfilt(b, a, np.array(data))
elif type == "highpass": # 高通滤波处理
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
return signal.filtfilt(b, a, np.array(data))
else: # 警告,滤波器类型必须有
raise ValueError("Please choose a type of fliter")
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
"""
:param data: Input signal
:param M: Length of structural element
:return: Signal after filter
"""
if not data.any():
raise ValueError("The input data is None, please given real value data")
g = np.ones(M)
Data_pre = np.insert(data, 0, np.zeros(M))
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
# Opening: 腐蚀 + 膨胀
out1 = Eorde(Data_pre, len(Data_pre), g, M)
out2 = Dilate(out1, len(out1), g, M)
out2 = np.insert(out2, 0, np.zeros(M - 2))
# Closing: 膨胀 + 腐蚀
out5 = Dilate(Data_pre, len(Data_pre), g, M)
out6 = Eorde(out5, len(out5), g, M)
out6 = np.insert(out6, 0, np.zeros(M - 2))
baseline = (out2 + out6) / 2
# -------------------------保留剩余价值------------------------
data_filtered = Data_pre[:len(baseline)] - baseline
data_filtered = data_filtered[M: M + len(data)]
baseline = baseline[M:]
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
baseline[-1] = baseline[-2] = baseline[-3]
if get_bre:
return data_filtered, baseline
else:
return data_filtered
def Iirnotch(self, data=None, cut_fre=50, quality=3):
"""陷波器"""
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
return signal.filtfilt(b, a, np.array(data))
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
"""
切比雪夫滤波器
:param data: Input signal
:param rp: The maximum ripple allowed
:param type: 'lowpass', 'bandpass, 'highpass'
:param low_cut: Low cut-off fre
:param high_cut: High cut-off fre
:param order: The order of filter
:return: Signal after filter
"""
if type == 'lowpass':
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'bandpass':
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
elif type == 'highpass':
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
return signal.filtfilt(b, a, np.array(data))
else:
raise ValueError("The type of filter is None, please given the real value!")
def Envelope(self, data):
"""取信号包络"""
if len(data) <= 1: raise ValueError("Wrong input data")
hx = fftpack.hilbert(data)
return np.sqrt(hx ** 2, data ** 2)
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
for c in c_level :
new_wp[c] = wp[c]
return new_wp.reconstruct()
# def em_decomposition(self, data):
# from pyhht.emd import EMD
# return EMD(data).decompose()

View File

@ -0,0 +1,84 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:Marques
@file:calc_metrics.py
@email:admin@marques22.com
@email:2021022362@m.scnu.edu.cn
@time:2022/02/12
"""
import torch
import torchmetrics
class CALC_METRICS:
metrics = []
nc = 0
def __init__(self, nc):
self.nc = nc
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
self.valid_result = self.train_result = None
def update(self, pred, target):
for part1 in self.metrics:
part1.update(pred.cpu(), target.cpu())
def compute(self):
result = []
for part1 in self.metrics:
result.append(part1.compute())
def reset(self):
for part1 in self.metrics:
part1.reset()
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
temp_result = []
for j in self.metrics:
compute_result = (j.compute().cpu().numpy() * 100).tolist()
temp_result.append(compute_result)
if epoch_type == "train":
self.train_result = [loss] + temp_result
elif epoch_type == "valid":
self.valid_result = [loss] + temp_result
else:
pass
a = ""
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
return a
def wandb_log(self, wandb=None, cur_lr=None):
if wandb is None:
return
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
dict_key = []
for epoch_type in ["train", "valid"]:
dict_key.append(epoch_type + "/" + "loss")
for i in keyword:
dict_key.append(epoch_type + "/" + i)
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
log_dict["lr"] = cur_lr
wandb.log(log_dict)
if __name__ == '__main__':
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
pred = torch.tensor(pred).cuda()
true = torch.tensor(true).cuda()
calc_metrics = CALC_METRICS(1)
calc_metrics.update(pred, true)
print(calc_metrics.get_matrix())

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -24,8 +24,8 @@ select_sampno:
- 1009
# train hyperparameters config
epoch: 20
lr: 0.0001
epoch: 50
lr: 0.00001
nc: 1
# wandb config

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -24,8 +24,8 @@ select_sampno:
- 1009
# train hyperparameters config
epoch: 20
lr: 0.0001
epoch: 50
lr: 0.00001
nc: 1
# wandb config

View File

@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
train_loss /= len(train_loader)
calc_metrics.compute()
logger.info("")
logger.info("--------------------------------------")
logger.info(training_state)
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
calc_metrics.reset()
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
calc_metrics.reset()
def model_test(model, test_loader, loss_func):
model.eval()
test_loss = 0.0
for resp, stft, labels in test_loader:
resp = resp.float().cuda() if gpu else resp.float()
stft = stft.float().cuda() if gpu else stft.float()
labels = labels.cuda() if gpu else labels
with torch.no_grad():
# segments = F.normalize(segments)
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
# segments = segments.view(len(segments), 1, -1)
out = model(resp, stft)
out = F.softmax(out, dim=1)
loss = loss_func(out, labels)
test_loss += loss.item()
labels = torch.unsqueeze(labels, dim=1)
out = torch.unsqueeze(out[:, 1], dim=1)
calc_metrics.update(out.cpu(), labels.cpu())
test_loss /= len(test_loader)
calc_metrics.compute()
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
calc_metrics.reset()
if __name__ == '__main__':
try:
@ -191,7 +221,7 @@ if __name__ == '__main__':
model_net.cuda()
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
logger.info('--------------------------------')
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
@ -213,9 +243,11 @@ if __name__ == '__main__':
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
# 重新初始化模型
del model_net
@ -251,5 +283,6 @@ if __name__ == '__main__':
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
model_valid(model_net, valid_loader, wdir, loss_function)
model_test(model_net, test_loader, loss_function)
if wandb is not None:
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])

View File

@ -1,5 +1,5 @@
# environment config
GPU: "0"
GPU: "1"
# dataset config
@ -24,8 +24,8 @@ select_sampno:
- 1009
# train hyperparameters config
epoch: 20
lr: 0.0001
epoch: 50
lr: 0.00001
nc: 1
# wandb config

View File

@ -4,13 +4,12 @@
# 初探学习率
-------------------------------------------------
000
学习率 1e-4 epoch 20 可以完成任务
学习率 1e-4 epoch 20 过早epoch 6过拟合
001
学习率 1e-5 epoch 50 和000结果相近所以选择000参数
学习率 1e-5 epoch 50 可以
002
学习率 1e-6 epoch 50 收敛过慢
学习率 1e-6 epoch 50 比较合适,就是太慢了
# 主要影响F1的是precision
-------------------------------------------------
@ -20,7 +19,23 @@
数据集减去平均值外,没有其他的预处理方法
--------------------------------------------------------
003
学习率 1e-5 从目前结果看用整晚数据集也行,不过还没有加入潮式呼吸数据
学习率 1e-5 epoch 50
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,没有其他的预处理方法 保持正样本权重为1改成保持负样本权重为1
--------------------------------------------------------
004
学习率 1e-5 epoch 50
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,没有其他的预处理方法 改成单输出
--------------------------------------------------------
005
学习率 1e-5 epoch 50
--------------------------------------------------------
@ -31,7 +46,7 @@
数据集减去平均值外削顶600归一化
--------------------------------------------------------
011
学习率 1e-5
学习率 1e-5 epoch 50
--------------------------------------------------------
--------------------------------------------------------
@ -39,7 +54,7 @@
数据集减去平均值外使用整体混叠信号Z-score整理数据集
--------------------------------------------------------
012
学习率 1e-4 EPOCH 20
学习率 1e-5 epoch 50
--------------------------------------------------------
@ -48,34 +63,89 @@
数据集减去平均值外使用每个片段的混叠信号Z-score整理数据集
--------------------------------------------------------
013
学习率 1e-4 EPOCH 20
学习率 1e-5 epoch 50
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,使用大体动检测后缩放
--------------------------------------------------------
014
学习率 1e-4 EPOCH 20
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5仅用质量好的数据训练,class_weight 解决不平衡问题,
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
# 修改卷积层的激活函数
--------------------------------------------------------
021
学习率 1e-4 这个不实现先
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
# 引入残差块
--------------------------------------------------------
022
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM
--------------------------------------------------------
023
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM 对两者输出的结果再用CNN
--------------------------------------------------------
024
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM 对两者输出的结果再用CNN
--------------------------------------------------------
025
学习率 1e-5 EPOCH 20 TODO
--------------------------------------------------------
##
TODO: 质量好的信号结果分析;严重程度的分析
--------------------------------------------------------
**全量**非整晚数据集训练 13.5class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
--------------------------------------------------------
022
031
学习率 1e-4
--------------------------------------------------------
--------------------------------------------------------
非整晚数据集训练 13.5仅用质量好的数据训练class_weight 解决不平衡问题,
数据集减去平均值外,选上述最好的处理方法
--------------------------------------------------------
0xx
学习率 1e-4 这个不实现先
--------------------------------------------------------