增加实验
This commit is contained in:
parent
8bdc1f736d
commit
8209b13e08
2
.gitignore
vendored
2
.gitignore
vendored
@ -138,3 +138,5 @@ dmypy.json
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# custom
|
||||
.idea/
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -446,9 +446,9 @@ def segment_to_event(df_segment, dataset_type):
|
||||
# shap_values = explainer.shap_values()
|
||||
|
||||
if __name__ == '__main__':
|
||||
all_output_path = list(exam_path.rglob("KFold_0"))
|
||||
all_output_path = list(exam_path.rglob("KFold_*"))
|
||||
for exam_index, test_exam_path in enumerate(all_output_path):
|
||||
# test_exam_path = exam_path / test_exam_path
|
||||
set_environment(exam_index)
|
||||
# test_and_analysis_and_visual(dataset_type="test")
|
||||
test_and_analysis_and_visual(dataset_type="test")
|
||||
test_and_analysis_and_visual(dataset_type="all_test")
|
||||
|
561
exam/004/generate_label_14.py
Normal file
561
exam/004/generate_label_14.py
Normal file
@ -0,0 +1,561 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:generate_label_11.0.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/09/05
|
||||
"""
|
||||
# 14.0
|
||||
# 手动均衡数量
|
||||
|
||||
# 13.0
|
||||
# 限制选择部分数据集,先做测试
|
||||
|
||||
# 12.0
|
||||
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
|
||||
|
||||
# 10.0
|
||||
# 使用提出质量差的信号
|
||||
|
||||
# 9.0
|
||||
# 增加 最新的质量标签 未使用
|
||||
|
||||
# 8.0
|
||||
# 生成 除低通气所有事件标签
|
||||
|
||||
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
|
||||
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
|
||||
|
||||
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
|
||||
|
||||
# 先提剔除极端值
|
||||
# 数值大于最高基准线或最低基准线
|
||||
# type1: average:1800 low:1200 high:2400
|
||||
# type2: average:2400 low:1800 high:3000
|
||||
|
||||
# 过多片段会造成平均值偏移
|
||||
# TODO
|
||||
# 加入体动标签,计算除体动外的平均值
|
||||
|
||||
# 最后降采为100hz
|
||||
|
||||
import time
|
||||
import logging
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import yaml
|
||||
from pathos import multiprocessing
|
||||
from tqdm import tqdm
|
||||
|
||||
# 数据集 和 标签 位置
|
||||
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
|
||||
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
|
||||
|
||||
# BCG 记录开始时间
|
||||
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
|
||||
dtype=object)
|
||||
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
|
||||
|
||||
# 读取每个数据集路径
|
||||
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
|
||||
all_numpy_dataset.sort()
|
||||
|
||||
# 划分后的数据集保存路径
|
||||
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
|
||||
dataset_save_path = Path(r"./dataset/")
|
||||
dataset_save_path.mkdir(exist_ok=True)
|
||||
|
||||
# 设置日志
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.NOTSET)
|
||||
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
|
||||
fh.setLevel(logging.NOTSET)
|
||||
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
|
||||
fh.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(fh)
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.NOTSET)
|
||||
ch.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(ch)
|
||||
|
||||
# all_label = []
|
||||
# 输出统计数据标题栏
|
||||
# logger.info("sampNo".center(8) +
|
||||
# "hpy_num".center(8) + "hpy_time".center(10) +
|
||||
# "csa_num".center(8) + "csa_time".center(10) +
|
||||
# "osa_num".center(8) + "osa_time".center(10) +
|
||||
# "msa_num".center(8) + "msa_time".center(10)
|
||||
# )
|
||||
|
||||
logger.info("sampNo".center(8) + ',' +
|
||||
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
|
||||
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
|
||||
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
|
||||
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
|
||||
)
|
||||
|
||||
base_random_seed = 42
|
||||
|
||||
window_second = 30
|
||||
step_second = 1
|
||||
|
||||
valid_ratio = 0.1
|
||||
test_ratio = 0.1
|
||||
|
||||
normal_event_quality_label = 0
|
||||
# valid_ratio = 5000
|
||||
# test_ratio = 10000
|
||||
|
||||
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
|
||||
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
|
||||
|
||||
# dataset sampNo for test
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
select_dataset = hyp["select_sampno"]
|
||||
|
||||
# 需要置成0的片段,前面不一定补零,还有可能上万
|
||||
disable_segment = {
|
||||
'221': [[0, 10000]],
|
||||
'670': [[0, 20000]],
|
||||
'683': [[0, 20000]],
|
||||
'704': [[0, 26000]],
|
||||
'726': [[0, 20000]],
|
||||
'736': [[0, 47000]],
|
||||
'933': [[0, 773560]],
|
||||
'935': [[0, 26600]],
|
||||
'952': [[0, 17000]],
|
||||
'955': [[0, 78000]],
|
||||
'961': [[0, 107000]],
|
||||
'962': [[0, 15100]],
|
||||
'966': [[0, 13120]],
|
||||
'967': [[0, 44000]],
|
||||
'1006': [[0, 60000]],
|
||||
'1009': [[0, 1000]],
|
||||
'1010': [[0, 49000]],
|
||||
'1296': [[0, 27000]],
|
||||
'1300': [[0, 33800]],
|
||||
'1301': [[0, 14000]],
|
||||
'1302': [[0, 5600]],
|
||||
'1374': [[0, 1000]],
|
||||
'1478': [[0, 998000]],
|
||||
|
||||
}
|
||||
|
||||
|
||||
# 生成数据集主函数
|
||||
def generate_label(No, dataset_path):
|
||||
"""
|
||||
:param dataset_path: 数据集路径
|
||||
:return:
|
||||
"""
|
||||
|
||||
# 获取数据编号
|
||||
sampNo = dataset_path.stem.split("samp")[0]
|
||||
# 标签路径
|
||||
label_path = bcg_label_path / f"export{sampNo}_all.csv"
|
||||
|
||||
if not label_path.exists():
|
||||
raise FileNotFoundError(f"{label_path} not exist")
|
||||
|
||||
if not dataset_path.exists():
|
||||
raise Exception(f"{dataset_path} not exists")
|
||||
|
||||
# 加载数据集
|
||||
select_numpy = np.load(dataset_path)
|
||||
select_numpy_len = len(select_numpy)
|
||||
|
||||
# 开头不合理片段置零
|
||||
if sampNo in disable_segment.keys():
|
||||
for sp, ep in disable_segment[sampNo]:
|
||||
select_numpy[sp:ep] = 0
|
||||
|
||||
# 剔除质量差信号
|
||||
if sampNo == "670":
|
||||
select_numpy = select_numpy[:17195 * 100]
|
||||
|
||||
# 获取前面补了多少0
|
||||
not_zero_point = 0
|
||||
for num in select_numpy:
|
||||
if num > 10:
|
||||
break
|
||||
not_zero_point += 1
|
||||
not_zero_point //= 100
|
||||
|
||||
# 读取标签
|
||||
label_csv = pd.read_csv(label_path, encoding='gbk')
|
||||
label_csv["new_start"] = label_csv["new_start"].astype("int")
|
||||
label_csv["new_end"] = label_csv["new_end"].astype("int")
|
||||
label_csv["Duration"] = label_csv["Duration"].astype("int")
|
||||
label_csv["new_label"] = label_csv["new_label"].fillna("2")
|
||||
label_csv["new_label"] = label_csv["new_label"].astype("int")
|
||||
# 剔除质量不好的样本
|
||||
# drop_csv = label_csv[
|
||||
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
|
||||
# label_csv = label_csv.drop(drop_csv.index)
|
||||
|
||||
# 事件片段与背景片段, 每个背景长度均为设定窗长
|
||||
segment_labels = []
|
||||
negative_labels = []
|
||||
hpy_num = csa_num = osa_num = msa_num = 0
|
||||
hpy_time = csa_time = osa_time = msa_time = 0
|
||||
|
||||
# 遍历全部事件并统计
|
||||
for i in range(len(label_csv)):
|
||||
# 进行LabelEncoder
|
||||
label = label_csv.iloc[i, :]
|
||||
# 如果事件在补零片段,则不添加到事件列表
|
||||
if label["new_end"] < not_zero_point:
|
||||
continue
|
||||
|
||||
if sampNo == "670" and label["new_start"] > 17195:
|
||||
continue
|
||||
|
||||
if label["new_end"] - label["new_start"] < 10:
|
||||
continue
|
||||
|
||||
# 将事件添加到事件列表
|
||||
if label["Event type"] == "Hypopnea":
|
||||
label_type = 1
|
||||
hpy_num += 1
|
||||
hpy_time += label["new_end"] - label["new_start"]
|
||||
|
||||
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
|
||||
# negative_labels.append(
|
||||
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
|
||||
continue
|
||||
elif label["Event type"] == "Central apnea":
|
||||
label_type = 2
|
||||
csa_num += 1
|
||||
csa_time += label["new_end"] - label["new_start"]
|
||||
elif label["Event type"] == "Obstructive apnea":
|
||||
label_type = 3
|
||||
osa_num += 1
|
||||
osa_time += label["new_end"] - label["new_start"]
|
||||
# MSA 认为是OSA
|
||||
elif label["Event type"] == "Mixed apnea":
|
||||
label_type = 3
|
||||
msa_num += 1
|
||||
msa_time += label["new_end"] - label["new_start"]
|
||||
else:
|
||||
continue
|
||||
# label_type = 0
|
||||
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
|
||||
print(sampNo, label)
|
||||
|
||||
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
|
||||
segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
|
||||
|
||||
# logger.info(sampNo.center(8) +
|
||||
# str(hpy_num).center(8) + str(hpy_time).center(10) +
|
||||
# str(csa_num).center(8) + str(csa_time).center(10) +
|
||||
# str(osa_num).center(8) + str(osa_time).center(10) +
|
||||
# str(msa_num).center(8) + str(msa_time).center(10))
|
||||
|
||||
# 设置随机树种子
|
||||
random_seed = base_random_seed + int(sampNo)
|
||||
|
||||
# 对于无事件的样本,直接将所有的片段
|
||||
if len(segment_labels) == 0:
|
||||
# 剔除补零片段(把开始点移动到补零结束)
|
||||
normal_SP = not_zero_point
|
||||
# 开头至少满足一个窗长
|
||||
if normal_SP < window_second:
|
||||
normal_SP = window_second
|
||||
|
||||
# 结束点为样本总长 除以 采样率
|
||||
normal_EP = select_numpy_len // 100
|
||||
label_type = 0
|
||||
# 正常时间编号为秒数 除以 30,即epoch
|
||||
negative_labels += [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1,
|
||||
SP1 + window_second] for SP1 in
|
||||
range(normal_SP - window_second + step_second, normal_EP - window_second + step_second,
|
||||
window_second)]
|
||||
|
||||
# 对于有事件的样本
|
||||
# 遍历事件,获取事件之间的背景片段
|
||||
for index in range(len(segment_labels) + 1):
|
||||
# 前一个事件的结尾 与 下一事件开头即为背景
|
||||
# 对于开头的无事件片段,则设定开始事件为0
|
||||
if index == 0:
|
||||
normal_SP = 0
|
||||
# 非开头片段,开始点为上一个事件的结尾
|
||||
else:
|
||||
# 加一秒 作为 缓冲
|
||||
normal_SP = segment_labels[index - 1][-1] + 1
|
||||
|
||||
# 最后一个事件则取到样本片段结尾
|
||||
if index == len(segment_labels):
|
||||
normal_EP = select_numpy_len // 100 - window_second
|
||||
# 否则结束事件取 本事件开头
|
||||
else:
|
||||
# 减一秒 作为 缓冲
|
||||
normal_EP = segment_labels[index][-2] - 1
|
||||
|
||||
# 剔除包含开头补零的片段
|
||||
if normal_EP < not_zero_point:
|
||||
continue
|
||||
|
||||
# 剔除开头不足30s的正常片段
|
||||
if normal_SP < window_second:
|
||||
continue
|
||||
label_type = 0
|
||||
|
||||
# 将背景事件按照滑窗距离逐个加入到背景事件中
|
||||
temp_1 = [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1, SP1 + window_second]
|
||||
for SP1 in range(normal_SP - window_second, normal_EP - window_second, window_second)]
|
||||
|
||||
negative_labels += temp_1
|
||||
|
||||
train_label, valid_label, test_label = [], [], []
|
||||
# assert (valid_ratio + test_ratio) < 1 <= len(segment_labels), f"{sampNo}训练集与测试集数量应小于总数据集数量{len(segment_labels)}"
|
||||
|
||||
# 对于测试数据全部直接保存
|
||||
if int(sampNo) in select_dataset:
|
||||
event_label = np.zeros(select_numpy_len // 100)
|
||||
quality_label = np.zeros(select_numpy_len // 100)
|
||||
# 用于存储事件标签
|
||||
for PN, segmentNo, label_type, new_label, SP, EP in segment_labels:
|
||||
event_label[SP:EP] = label_type
|
||||
|
||||
test_label = []
|
||||
# 剔除补零片段
|
||||
normal_SP = not_zero_point
|
||||
if normal_SP < window_second:
|
||||
normal_SP = window_second
|
||||
normal_EP = select_numpy_len // 100
|
||||
|
||||
# 分成指定窗长的滑窗片段
|
||||
test_label += [
|
||||
[sampNo, SP1 // 30, int(event_label[SP1 + window_second - step_second]),
|
||||
int(quality_label[SP1 + window_second - step_second]),
|
||||
SP1, SP1 + window_second] for SP1 in range(normal_SP - window_second + step_second,
|
||||
normal_EP - window_second + step_second, step_second)]
|
||||
|
||||
logger.info(sampNo.center(8) + ',' +
|
||||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||||
str(len(test_label)).center(10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
|
||||
',' + str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
|
||||
)
|
||||
|
||||
df2.loc[No] = [sampNo,
|
||||
str(0), str(0), str(0),
|
||||
str(0), str(0), str(0),
|
||||
str(len(test_label)),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||||
str(0), str(0), str(len(segment_labels))]
|
||||
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
|
||||
# np.array(test_label))
|
||||
df1 = pd.DataFrame(data=test_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
|
||||
index=False)
|
||||
|
||||
train_label, valid_label, test_label = [], [], []
|
||||
# 对于训练与验证集样本
|
||||
if True:
|
||||
# 打乱片段顺序
|
||||
np.random.seed(random_seed)
|
||||
np.random.shuffle(segment_labels)
|
||||
np.random.shuffle(negative_labels)
|
||||
|
||||
# 获取训练集、验证集、测试集分到事件个数
|
||||
if 0 < valid_ratio < 1:
|
||||
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
|
||||
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
|
||||
else:
|
||||
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
|
||||
valid_segment_num = valid_ratio
|
||||
|
||||
# 分别将各事件切分为30s, 步进1秒的片段
|
||||
for index in range(train_segment_num):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
train_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||||
|
||||
for index in range(train_segment_num, valid_segment_num):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
valid_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||||
|
||||
for index in range(valid_segment_num, len(segment_labels)):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
test_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second * step_second)]
|
||||
|
||||
# 计算片段和事件个数
|
||||
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
|
||||
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
|
||||
len(segment_labels) - valid_segment_num)
|
||||
|
||||
# 数据集补偿
|
||||
# if train_num < 300:
|
||||
# train_num = 300 - train_num
|
||||
#
|
||||
# if valid_num < 300:
|
||||
# valid_num = 300 - valid_num
|
||||
#
|
||||
# if test_num < 300:
|
||||
# test_num = 300 - test_num
|
||||
|
||||
# 获取训练集、验证集、测试集分到背景个数
|
||||
# if 0 < valid_ratio < 1:
|
||||
# train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
|
||||
# valid_eve2 = int(len(negative_labels) * valid_ratio)
|
||||
# else:
|
||||
# train_eve2 = len(negative_labels) - valid_ratio - test_ratio
|
||||
# valid_eve2 = valid_ratio
|
||||
#
|
||||
# test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
|
||||
# 直接补充到足够个数的背景事件
|
||||
train_eve2 = max(train_eve, 300)
|
||||
valid_eve2 = max(valid_eve, 40)
|
||||
test_eve2 = max(test_eve, 40)
|
||||
|
||||
# 强制背景数量
|
||||
# train_eve2 = train_eve
|
||||
# valid_eve2 = valid_eve
|
||||
# test_eve2 = test_eve
|
||||
|
||||
# 添加背景事件数量
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[:train_eve2]:
|
||||
label_type = int(label_type)
|
||||
train_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||||
in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||||
train_eve2: train_eve2 + valid_eve2]:
|
||||
label_type = int(label_type)
|
||||
valid_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||||
in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||||
train_eve2 + valid_eve2:train_eve2 + valid_eve2 + test_eve2]:
|
||||
label_type = int(label_type)
|
||||
test_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
logger.info(sampNo.center(8) + ',' +
|
||||
str(len(train_label)).center(10) + ',' +
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(len(valid_label)).center(10) + ',' +
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(len(test_label)).center(10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
|
||||
)
|
||||
|
||||
df2.loc[No] = [sampNo.center(8),
|
||||
str(len(train_label)),
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
|
||||
str(len(valid_label)),
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
|
||||
str(len(test_label)),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||||
str(train_eve), str(valid_eve), str(test_eve).center(10)]
|
||||
|
||||
def label_check(label_list):
|
||||
temp_list = []
|
||||
for sampNo, index, label_type, new_label, SP, EP in label_list:
|
||||
if EP - SP < window_second:
|
||||
print(sampNo, index, label_type, SP, EP)
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
if SP < 0:
|
||||
print(sampNo, index, label_type, SP, EP)
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||||
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
for j in temp_list:
|
||||
label_list.remove(j)
|
||||
|
||||
label_check(train_label)
|
||||
label_check(valid_label)
|
||||
label_check(test_label)
|
||||
for sampNo, index, label_type, new_label, SP, EP in train_label:
|
||||
if EP - SP < window_second:
|
||||
print(sampNo, index, label_type, new_label, SP, EP)
|
||||
|
||||
if SP < 0:
|
||||
print(sampNo, index, label_type, new_label, SP, EP)
|
||||
|
||||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||||
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||||
|
||||
df1 = pd.DataFrame(data=train_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
|
||||
index=False)
|
||||
|
||||
df1 = pd.DataFrame(data=valid_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
|
||||
index=False)
|
||||
|
||||
df1 = pd.DataFrame(data=test_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
|
||||
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
|
||||
# np.array(train_label))
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
|
||||
# np.array(valid_label))
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
|
||||
# np.array(test_label))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pool = multiprocessing.Pool(processes=44)
|
||||
# pool.map(generate_label, list(all_numpy_dataset))
|
||||
# pool.close()
|
||||
# pool.join()
|
||||
|
||||
df2 = pd.DataFrame(data=None,
|
||||
columns=["sampNo",
|
||||
"train_num", "train_P", "train_N",
|
||||
"valid_num", "valid_P", "valid_N",
|
||||
"test_num", "test_P", "test_N",
|
||||
"train_eve", "valid_eve", "test_eve"])
|
||||
|
||||
temp = []
|
||||
for one_dataset in all_numpy_dataset:
|
||||
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
|
||||
temp.append(one_dataset)
|
||||
# for one_dataset in temp:
|
||||
# all_numpy_dataset.remove(one_dataset)
|
||||
|
||||
for No, one_dataset in enumerate(temp):
|
||||
generate_label(No, one_dataset)
|
||||
|
||||
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
|
||||
# generate_label(all_numpy_dataset[0])
|
178
exam/004/load_dataset.py
Normal file
178
exam/004/load_dataset.py
Normal file
@ -0,0 +1,178 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:load_dataset.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2021/12/03
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch.utils.data
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
from utils.Preprocessing import BCG_Operation
|
||||
|
||||
preprocessing = BCG_Operation()
|
||||
preprocessing.sample_rate = 100
|
||||
"""
|
||||
1. 读取方法
|
||||
# 无论是否提前切分,均提前转成npy格式
|
||||
# 1.1 提前预处理,切分好后生成npy,直接载入切分好的片段 内存占用多 读取简单
|
||||
使用此方法: 1.2 提前预处理,载入整夜数据,切分好后生成csv或xls,根据片段读取 内存占用少 读取较为复杂
|
||||
"""
|
||||
|
||||
datasets = {}
|
||||
|
||||
|
||||
# 减少重复读取
|
||||
def read_dataset(data_path, augment=None):
|
||||
data_path = Path(data_path)
|
||||
try:
|
||||
f = []
|
||||
if data_path.is_dir():
|
||||
dataset_list = list(data_path.rglob("*.npy"))
|
||||
dataset_list.sort()
|
||||
f += dataset_list
|
||||
elif data_path.is_file():
|
||||
raise Exception(f'dataset path should be a dir')
|
||||
else:
|
||||
raise Exception(f'{data_path} does not exist')
|
||||
except Exception as e:
|
||||
raise Exception(f'Error loading data from {data_path}: {e} \n')
|
||||
|
||||
print("loading dataset")
|
||||
for i in tqdm(f):
|
||||
select_dataset = np.load(i)
|
||||
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
|
||||
if augment is not None:
|
||||
select_dataset = augment(select_dataset)
|
||||
datasets[i.name.split("samp")[0]] = select_dataset
|
||||
|
||||
|
||||
# 用第二种方法读取
|
||||
class ApneaDataset(Dataset):
|
||||
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
|
||||
self.data_path = data_path
|
||||
self.label_path = label_path
|
||||
self.segment_augment = segment_augment
|
||||
self.labels = None
|
||||
self.dataset_type = dataset_type
|
||||
self.select_sampNo = select_sampno
|
||||
|
||||
# self._getAllData()
|
||||
self._getAllLabels()
|
||||
|
||||
def __getitem__(self, index):
|
||||
# PN patience number
|
||||
# SP/EP start point, end point
|
||||
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
|
||||
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
dataset = datasets[str(PN)]
|
||||
segment = self.segment_augment(dataset, SP, EP)
|
||||
return (*segment, int(float(label_type) > 1))
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
def count_SA(self):
|
||||
return sum(self.labels[:, 3] > 1)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.labels)
|
||||
|
||||
def _getAllLabels(self):
|
||||
label_path = Path(self.label_path)
|
||||
if not label_path.exists():
|
||||
raise Exception(f'{self.label_path} does not exist')
|
||||
|
||||
try:
|
||||
f = []
|
||||
if label_path.is_dir():
|
||||
if self.dataset_type == "train":
|
||||
label_list = list(label_path.rglob("*_train_label.csv"))
|
||||
elif self.dataset_type == "valid":
|
||||
label_list = list(label_path.rglob("*_valid_label.csv"))
|
||||
elif self.dataset_type == "test":
|
||||
label_list = list(label_path.glob("*_sa_test_label.csv"))
|
||||
# label_list = list(label_path.rglob("*_test_label.npy"))
|
||||
elif self.dataset_type == "all_test":
|
||||
label_list = list(label_path.rglob("*_sa_all_label.csv"))
|
||||
else:
|
||||
raise ValueError("self.dataset type error")
|
||||
# label_list = list(label_path.rglob("*_label.npy"))
|
||||
label_list.sort()
|
||||
f += label_list
|
||||
elif label_path.is_file():
|
||||
raise Exception(f'dataset path should be a dir')
|
||||
else:
|
||||
raise Exception(f'{self.label_path} does not exist')
|
||||
except Exception as e:
|
||||
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
|
||||
print("loading labels")
|
||||
for i in tqdm(f):
|
||||
if int(i.name.split("_")[0]) not in self.select_sampNo:
|
||||
continue
|
||||
|
||||
if self.labels is None:
|
||||
self.labels = pd.read_csv(i).to_numpy(dtype=int)
|
||||
else:
|
||||
labels = pd.read_csv(i).to_numpy(dtype=int)
|
||||
if len(labels) > 0:
|
||||
self.labels = np.concatenate((self.labels, labels))
|
||||
# self.labels = self.labels[:10000]
|
||||
print(f"{self.dataset_type} length is {len(self.labels)}")
|
||||
|
||||
|
||||
class TestApneaDataset(ApneaDataset):
|
||||
def __init__(self, data_path, label_path, dataset_type, select_sampno, segment_augment=None):
|
||||
super(TestApneaDataset, self).__init__(
|
||||
data_path=data_path,
|
||||
label_path=label_path,
|
||||
dataset_type=dataset_type,
|
||||
select_sampno=select_sampno,
|
||||
segment_augment=segment_augment
|
||||
)
|
||||
|
||||
def __getitem__(self, index):
|
||||
PN, segmentNo, label_type, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
segment = datasets[str(PN)][int(SP) * 100:int(EP) * 100].copy()
|
||||
if self.segment_augment is not None:
|
||||
segment = self.segment_augment(segment)
|
||||
return segment, int(float(label_type) > 1), PN, segmentNo, SP, EP
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
|
||||
class TestApneaDataset2(ApneaDataset):
|
||||
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
|
||||
super(TestApneaDataset2, self).__init__(
|
||||
data_path=data_path,
|
||||
label_path=label_path,
|
||||
dataset_type=dataset_type,
|
||||
segment_augment=segment_augment,
|
||||
select_sampno=select_sampno
|
||||
)
|
||||
|
||||
def __getitem__(self, index):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
dataset = datasets[str(PN)]
|
||||
segment = self.segment_augment(dataset, SP, EP)
|
||||
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
288
exam/004/main.py
Normal file
288
exam/004/main.py
Normal file
@ -0,0 +1,288 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@time:2021/10/15
|
||||
"""
|
||||
import os
|
||||
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import time
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.cuda
|
||||
from tqdm import tqdm
|
||||
from torchinfo import summary
|
||||
from load_dataset import ApneaDataset, read_dataset
|
||||
|
||||
from torch import nn
|
||||
from utils.calc_metrics import CALC_METRICS
|
||||
from sklearn.model_selection import KFold
|
||||
from model.Hybrid_Net001 import HYBRIDNET001
|
||||
# from utils.LossFunction import Foca1lLoss
|
||||
from my_augment import my_augment, my_segment_augment
|
||||
# 加载配置
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
|
||||
os.environ["WANDB_MODE"] = "dryrun"
|
||||
|
||||
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
|
||||
# 读取地址参数
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
label_path = hyp["Path"]["label"]
|
||||
|
||||
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 设置日志
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.NOTSET)
|
||||
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
|
||||
fh.setLevel(logging.NOTSET)
|
||||
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
|
||||
logger.addHandler(fh)
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.NOTSET)
|
||||
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
|
||||
logger.addHandler(ch)
|
||||
logging.getLogger('matplotlib.font_manager').disabled = True
|
||||
logger.info("------------------------------------")
|
||||
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
|
||||
|
||||
# 备份配置
|
||||
with open(save_dir / 'settings.yaml', 'w') as f:
|
||||
yaml.dump(hyp, f, sort_keys=False)
|
||||
|
||||
# Hyper-parameters
|
||||
gpu = torch.cuda.is_available()
|
||||
epochs = hyp["epoch"]
|
||||
lr = hyp["lr"]
|
||||
nc = hyp["nc"]
|
||||
bs = hyp["batch_size"]
|
||||
worker = hyp["number_worker"]
|
||||
select_sampno = hyp["select_sampno"]
|
||||
|
||||
read_dataset(data_path, augment=my_augment)
|
||||
calc_metrics = CALC_METRICS(nc)
|
||||
|
||||
|
||||
# 开始训练
|
||||
# 训练
|
||||
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
|
||||
model.train()
|
||||
train_loss = 0.0
|
||||
optimizer.zero_grad()
|
||||
|
||||
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
|
||||
pbar.set_description(training_state)
|
||||
for i, (resp, stft, labels) in pbar:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
# 强行归一化数据
|
||||
# segments = F.normalize(segments)
|
||||
# print(segments.size())
|
||||
# 减去平均值
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# 一维卷积在最后一位上卷积 所以输入CNN应为【batch_size, embedding size, sequence size】
|
||||
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
|
||||
loss = loss_func(out, labels)
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
# 余弦退火传入变量
|
||||
# scheduler.step(epoch + i / len(train_loader.dataset))
|
||||
# 自适应调整传入变量
|
||||
scheduler.step(loss)
|
||||
|
||||
loss_value = loss.item()
|
||||
train_loss += loss_value
|
||||
# cur_lr = optimizer.param_groups[-1]['lr']
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = F.softmax(out, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
# if i % 20 == 0:
|
||||
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
|
||||
|
||||
cur_lr = optimizer.param_groups[-1]['lr']
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_valid(model, valid_loader, wdir, loss_func):
|
||||
model.eval()
|
||||
valid_loss = 0.0
|
||||
for resp, stft, labels in valid_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
valid_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
valid_loss /= len(valid_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
|
||||
global best_f1
|
||||
valid_f1 = calc_metrics.metrics[-1].compute()
|
||||
if valid_f1 > best_f1:
|
||||
best_f1 = valid_f1
|
||||
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
|
||||
torch.save(model.state_dict(), wdir / f"best.pt")
|
||||
if wandb is not None:
|
||||
wandb.run.summary["best_f1"] = valid_f1
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
import wandb
|
||||
except ImportError:
|
||||
wandb = None
|
||||
prefix = 'wandb: '
|
||||
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
|
||||
|
||||
if wandb is not None and wandb.run is None:
|
||||
wandb_run = wandb.init(
|
||||
config=hyp,
|
||||
name=save_dir.stem,
|
||||
project=hyp["project"],
|
||||
notes=hyp["Note"],
|
||||
tags=hyp["tags"],
|
||||
entity=hyp["entity"],
|
||||
)
|
||||
exam_name = Path("./").absolute().name
|
||||
|
||||
model_net = eval(hyp["model_name"])()
|
||||
model_net.initialize_weights()
|
||||
summary(model_net, [(1, 120, 1), (1, 1, 121, 26)])
|
||||
|
||||
time.sleep(3)
|
||||
if gpu:
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
|
||||
train_set = [select_sampno[i] for i in train_ids]
|
||||
test_set = [select_sampno[i] for i in test_ids]
|
||||
logger.info(f'Train_Set:{train_set}')
|
||||
logger.info(f'Independent_Test_Set:{test_set}')
|
||||
|
||||
sub_save_dir = save_dir / f"KFold_{fold}"
|
||||
sub_save_dir.mkdir(exist_ok=True, parents=True)
|
||||
wdir = sub_save_dir / "weights"
|
||||
wdir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
hyp["train_set"] = train_set
|
||||
hyp["test_set"] = test_set
|
||||
with open(sub_save_dir / 'settings.yaml', 'w') as f:
|
||||
yaml.dump(hyp, f, sort_keys=False)
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
model_net = eval(hyp["model_name"])()
|
||||
model_net.initialize_weights()
|
||||
if gpu:
|
||||
model_net.cuda()
|
||||
|
||||
logger.info(f"Weight is {[1, (len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]}")
|
||||
# 损失函数与优化器
|
||||
loss_function = nn.CrossEntropyLoss(
|
||||
weight=torch.Tensor([1, (len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]).cuda())
|
||||
|
||||
# loss_func = nn.BCEWithLogitsLoss()
|
||||
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
|
||||
|
||||
# momentum
|
||||
# nesterov 牛顿动量
|
||||
# weight_decay L2正则
|
||||
# optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
|
||||
|
||||
optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
|
||||
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
|
||||
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
|
||||
patience=2836, min_lr=1e-8,
|
||||
verbose=True)
|
||||
|
||||
# 参数记录
|
||||
best_f1 = 0
|
||||
|
||||
for epoch in range(epochs):
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
97
exam/004/model/Hybrid_Net001.py
Normal file
97
exam/004/model/Hybrid_Net001.py
Normal file
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:andrew
|
||||
@file:Hybrid_Net001.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/09/30
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torchinfo import summary
|
||||
from torch import cat
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
|
||||
# 输入时长
|
||||
WHOLE_SEGMENT_SECOND = 30
|
||||
|
||||
# 呼吸采样率
|
||||
RESPIRATORY_FRE = 4
|
||||
|
||||
# BCG 时频图大小
|
||||
BCG_GRAPH_SIZE = (26, 121)
|
||||
|
||||
|
||||
class HYBRIDNET001(nn.Module):
|
||||
def __init__(self, num_classes=2, init_weights=True):
|
||||
super(HYBRIDNET001, self).__init__()
|
||||
|
||||
self.lstm = nn.LSTM(input_size=1,
|
||||
hidden_size=16,
|
||||
num_layers=1,
|
||||
bidirectional=True,
|
||||
batch_first=True)
|
||||
|
||||
self.right = nn.Sequential(
|
||||
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(16),
|
||||
|
||||
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(32),
|
||||
|
||||
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(32)
|
||||
|
||||
|
||||
|
||||
)
|
||||
|
||||
self.classifier = nn.Sequential(
|
||||
# nn.Dropout(p=0.5),
|
||||
nn.Linear((120 * 32 + 32 * 16 * 4), 512),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(512, num_classes),
|
||||
)
|
||||
|
||||
if init_weights:
|
||||
self.initialize_weights()
|
||||
|
||||
def initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x1, (_, _) = self.lstm(x1)
|
||||
x2 = self.right(x2)
|
||||
# print(x1.shape)
|
||||
# print(x2.shape)
|
||||
x1 = torch.flatten(x1, start_dim=1)
|
||||
x2 = torch.flatten(x2, start_dim=1)
|
||||
x = torch.cat((x1, x2), dim=1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = HYBRIDNET001(2).cuda()
|
||||
summary(model, [(32, 120, 1), (32, 1, 121, 26)])
|
51
exam/004/my_augment.py
Normal file
51
exam/004/my_augment.py
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:my_augment.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/07/26
|
||||
"""
|
||||
from utils.Preprocessing import BCG_Operation
|
||||
import numpy as np
|
||||
from scipy.signal import stft
|
||||
|
||||
preprocessing = BCG_Operation()
|
||||
preprocessing.sample_rate = 100
|
||||
|
||||
|
||||
def my_augment(dataset):
|
||||
dataset = dataset - dataset.mean()
|
||||
dataset = preprocessing.Iirnotch(dataset)
|
||||
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
|
||||
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.7, order=6)
|
||||
dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
|
||||
dataset = {"low": dataset_low,
|
||||
"high": dataset_high}
|
||||
return dataset
|
||||
|
||||
|
||||
def get_stft(x, fs, n):
|
||||
print(len(x))
|
||||
f, t, amp = stft(x, fs, nperseg=n)
|
||||
z = np.abs(amp.copy())
|
||||
return f, t, z
|
||||
|
||||
|
||||
def my_segment_augment(dataset, SP, EP):
|
||||
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
|
||||
dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
|
||||
|
||||
dataset_low = dataset_low[::25]
|
||||
dataset_low = dataset_low.reshape(dataset_low.shape[0], 1)
|
||||
|
||||
_, _, dataset_high = stft(dataset_high, 100, nperseg=50)
|
||||
dataset_high = dataset_high.astype(np.float).T
|
||||
dataset_high = dataset_high.reshape(1, dataset_high.shape[0], dataset_high.shape[1])
|
||||
|
||||
return dataset_low, dataset_high
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
42
exam/004/settings.yaml
Normal file
42
exam/004/settings.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
# environment config
|
||||
GPU: "1"
|
||||
|
||||
|
||||
# dataset config
|
||||
Path:
|
||||
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
|
||||
label: ./dataset/
|
||||
save: ./output/
|
||||
|
||||
batch_size: 256
|
||||
number_worker: 0
|
||||
model_name: HYBRIDNET001
|
||||
select_sampno:
|
||||
- 282
|
||||
- 726
|
||||
- 229
|
||||
- 582
|
||||
- 286
|
||||
- 966
|
||||
- 1000
|
||||
- 1004
|
||||
- 1006
|
||||
- 1009
|
||||
|
||||
# train hyperparameters config
|
||||
epoch: 50
|
||||
lr: 0.00001
|
||||
nc: 1
|
||||
|
||||
# wandb config
|
||||
entity: "marques"
|
||||
project: "Sleep_Apnea_HYBRID00X"
|
||||
Note: "HYBRID001 NOBD STFT RESP DUAL"
|
||||
tags: ["ReduceLROnPlateau", "RESP LSTM", "STFT CNN"]
|
||||
|
||||
|
||||
# "CW":class_weight
|
||||
|
||||
|
||||
# "CosineAnnealingLR"
|
||||
# "ReduceLROnPlateau"
|
454
exam/004/test_save_result.py
Normal file
454
exam/004/test_save_result.py
Normal file
@ -0,0 +1,454 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:test_analysis.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/02/21
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import torch.cuda
|
||||
import numpy as np
|
||||
import yaml
|
||||
from matplotlib import pyplot as plt
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from load_dataset import TestApneaDataset2, read_dataset
|
||||
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
|
||||
from torch import nn
|
||||
from utils.calc_metrics import CALC_METRICS
|
||||
from my_augment import my_augment, my_segment_augment
|
||||
from model.Hybrid_Net001 import HYBRIDNET001
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
exam_path = Path("./output/")
|
||||
|
||||
# 置信率阈值
|
||||
thresh = 0.5
|
||||
# 间隔最小距离
|
||||
thresh_event_interval = 2
|
||||
# 最小事件长度
|
||||
thresh_event_length = 8
|
||||
|
||||
#
|
||||
event_thresh = 1
|
||||
|
||||
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
|
||||
severity_label = {"all": "none"}
|
||||
severity_df = pd.read_excel(severity_path)
|
||||
for one_data in severity_df.index:
|
||||
one_data = severity_df.loc[one_data]
|
||||
severity_label[str(one_data["数据编号"])] = one_data["程度"]
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
gpu = torch.cuda.is_available()
|
||||
|
||||
num_classes = 1
|
||||
calc_metrics = CALC_METRICS(num_classes)
|
||||
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
read_dataset(data_path, augment=my_augment)
|
||||
del hyp
|
||||
|
||||
# 默认取最新的文件夹
|
||||
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
|
||||
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
|
||||
train_set, test_set = None, None
|
||||
loss_func = nn.CrossEntropyLoss()
|
||||
|
||||
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
|
||||
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
|
||||
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
|
||||
|
||||
|
||||
def set_environment(i):
|
||||
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
|
||||
model, model_name, train_set, test_set
|
||||
|
||||
output_path = all_output_path[i]
|
||||
print(output_path)
|
||||
segments_results_save_path = (output_path / "segments_results")
|
||||
segments_results_save_path.mkdir(exist_ok=True)
|
||||
events_results_save_path = (output_path / "events_results")
|
||||
events_results_save_path.mkdir(exist_ok=True)
|
||||
|
||||
# 加载配置
|
||||
with open(output_path / "settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
label_path = hyp["Path"]["label"]
|
||||
train_set = hyp["train_set"]
|
||||
test_set = hyp["test_set"]
|
||||
|
||||
model_path = output_path / "weights" / "best.pt"
|
||||
model = eval(hyp["model_name"])()
|
||||
model_name = hyp["model_name"]
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.cuda()
|
||||
model.eval()
|
||||
|
||||
|
||||
def test_and_analysis_and_visual(dataset_type):
|
||||
if dataset_type == "test":
|
||||
sampNo = train_set
|
||||
elif dataset_type == "all_test":
|
||||
sampNo = test_set
|
||||
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
|
||||
segment_augment=my_segment_augment)
|
||||
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
|
||||
|
||||
test_loss = 0.0
|
||||
|
||||
df_segment = pd.DataFrame(columns=columns)
|
||||
|
||||
for one in tqdm(test_loader, total=len(test_loader)):
|
||||
resp, stft, labels = one[:3]
|
||||
other_info = one[3:]
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
out = model(resp, stft)
|
||||
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = F.softmax(out, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
# one[0] = list(one[0].cpu().numpy())
|
||||
# one[1] = list(one[1].cpu().numpy())
|
||||
# one = one[1:]
|
||||
# out = out.view(1, -1).cpu().numpy().tolist()
|
||||
# one += out
|
||||
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
|
||||
|
||||
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
|
||||
one2 = one2.transpose((1, 0))
|
||||
df = pd.DataFrame(data=one2, columns=columns)
|
||||
df_segment = df_segment.append(df, ignore_index=True)
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
print(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
|
||||
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
|
||||
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
|
||||
|
||||
# 片段级分析
|
||||
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
|
||||
|
||||
# 绘制混淆矩阵
|
||||
# 每个样本都绘制一份
|
||||
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
|
||||
# 绘制柱状图
|
||||
|
||||
# 事件级分析
|
||||
# 对于inner_test 每个编号就是一个事件
|
||||
# 而对于整晚的independence_test,需要另行计算
|
||||
df_all_event = segment_to_event(df_segment, dataset_type)
|
||||
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
|
||||
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
|
||||
|
||||
# 剔除质量不好的样本
|
||||
df_bad_segment = df_segment[
|
||||
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
|
||||
df_select_segment = df_segment.drop(df_bad_segment.index)
|
||||
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
|
||||
dataset_type)
|
||||
df_select_event = segment_to_event(df_select_segment, dataset_type)
|
||||
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
|
||||
is_event=True)
|
||||
|
||||
|
||||
def analysis_results(df_result, base_path, dataset_type, is_event=False):
|
||||
if df_result.empty:
|
||||
print(base_path, dataset_type, "is_empty")
|
||||
return None
|
||||
|
||||
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
|
||||
|
||||
all_sampNo = df_result["sampNo"].unique()
|
||||
df_metrics = pd.DataFrame(columns=columns2)
|
||||
df_metrics.loc[0] = 0
|
||||
df_metrics.loc[0]["sampNo"] = dataset_type
|
||||
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df = df_result[df_result["sampNo"] == sampNo]
|
||||
df.to_csv(
|
||||
base_path / dataset_type /
|
||||
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
|
||||
index=False)
|
||||
|
||||
df_metrics.loc[index + 1] = np.NAN
|
||||
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
|
||||
df_metrics.loc[index + 1]["support"] = df.shape[0]
|
||||
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
|
||||
|
||||
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
|
||||
# continue
|
||||
# else:
|
||||
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
|
||||
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
|
||||
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
|
||||
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
|
||||
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
|
||||
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
|
||||
df_metrics.loc[index + 1]["TP"] = \
|
||||
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
|
||||
df_metrics.loc[index + 1]["FP"] = \
|
||||
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
|
||||
df_metrics.loc[index + 1]["TN"] = \
|
||||
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
|
||||
df_metrics.loc[index + 1]["FN"] = \
|
||||
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
|
||||
|
||||
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
|
||||
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
|
||||
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
|
||||
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
|
||||
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
|
||||
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
|
||||
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
|
||||
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
|
||||
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
|
||||
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
|
||||
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
|
||||
col] != 0 else np.NAN
|
||||
|
||||
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
|
||||
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
|
||||
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
|
||||
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
|
||||
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
|
||||
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
|
||||
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
|
||||
"spec", "pre", "NPV", "F1score"]:
|
||||
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
|
||||
df_metrics.loc[index + 1][col]
|
||||
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
|
||||
|
||||
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
|
||||
# return None
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
|
||||
|
||||
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
|
||||
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
|
||||
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
|
||||
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
|
||||
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
|
||||
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
|
||||
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
|
||||
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
|
||||
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
|
||||
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
|
||||
|
||||
# 在inner_test中根据 分严重程度绘制
|
||||
if dataset_type == "test":
|
||||
all_severity = ["正常", "轻度", "中度", "重度"]
|
||||
for index, severity in enumerate(all_severity):
|
||||
df_event = df_metrics[df_metrics["severity"] == severity]
|
||||
df_temp = pd.DataFrame(columns=columns2)
|
||||
df_temp.loc[0] = 0
|
||||
df_temp.loc[0]["sampNo"] = severity
|
||||
df_temp.loc[0]["severity"] = str(index + 1)
|
||||
|
||||
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
|
||||
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
|
||||
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
|
||||
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
|
||||
df_temp.loc[0]["T"] += df_event["T"].sum()
|
||||
df_temp.loc[0]["F"] += df_event["F"].sum()
|
||||
df_temp.loc[0]["TP"] += df_event["TP"].sum()
|
||||
df_temp.loc[0]["FP"] += df_event["FP"].sum()
|
||||
df_temp.loc[0]["TN"] += df_event["TN"].sum()
|
||||
df_temp.loc[0]["FN"] += df_event["FN"].sum()
|
||||
df_temp.loc[0]["support"] += df_event["support"].sum()
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
|
||||
|
||||
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
|
||||
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
|
||||
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
|
||||
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
|
||||
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
|
||||
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
|
||||
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
|
||||
"spec", "pre", "NPV", "F1score"]:
|
||||
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
|
||||
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
|
||||
|
||||
df_metrics = df_metrics.append(df_temp, ignore_index=True)
|
||||
|
||||
df_backup = df_metrics
|
||||
df_metrics = df_metrics.astype("str")
|
||||
df_metrics = df_metrics.sort_values("severity")
|
||||
df_metrics.to_csv(base_path / dataset_type /
|
||||
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
|
||||
index=False, encoding="gbk")
|
||||
|
||||
return df_backup
|
||||
|
||||
|
||||
def confusionMatrix(df_analysis, base_path, dataset_type):
|
||||
if df_analysis is None:
|
||||
print(base_path, dataset_type, "is None")
|
||||
return
|
||||
|
||||
if df_analysis.empty:
|
||||
print(base_path, dataset_type, "is_empty")
|
||||
return
|
||||
classes = ["normal", "SA"]
|
||||
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
|
||||
for one_samp in df_analysis.index:
|
||||
one_samp = df_analysis.loc[one_samp]
|
||||
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
|
||||
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
|
||||
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
|
||||
|
||||
|
||||
def segment_to_event(df_segment, dataset_type):
|
||||
df_all_event = pd.DataFrame(columns=columns)
|
||||
all_sampNo = df_segment["sampNo"].unique()
|
||||
|
||||
if dataset_type == "test":
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df_event = pd.DataFrame(columns=columns)
|
||||
df = df_segment[df_segment["sampNo"] == sampNo].copy()
|
||||
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
|
||||
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
|
||||
all_segments_no = df["segmentNo"].unique()
|
||||
|
||||
for index_se, segment_No in enumerate(all_segments_no):
|
||||
df_temp = df[df["segmentNo"] == segment_No].copy()
|
||||
SP = df_temp.iloc[0]["EP"]
|
||||
EP = df_temp.iloc[-1]["EP"] + 1
|
||||
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
|
||||
df_temp.iloc[0]["new_label"], SP, EP, 0]
|
||||
|
||||
thresh_Pred = df_temp["thresh_Pred"].values
|
||||
thresh_Pred2 = thresh_Pred.copy()
|
||||
|
||||
# 扩充
|
||||
for index_pred, pred in enumerate(thresh_Pred):
|
||||
if pred == 0:
|
||||
continue
|
||||
|
||||
for interval in range(1, thresh_event_interval):
|
||||
if pred == 1 and index_pred + interval < thresh_Pred.size:
|
||||
thresh_Pred2[index_pred + interval] = 1
|
||||
else:
|
||||
continue
|
||||
|
||||
# 判断
|
||||
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
|
||||
index_ar = np.where(same_ar)[0]
|
||||
count_ar = np.diff(index_ar)
|
||||
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
|
||||
for i in value_ar:
|
||||
if i > thresh_event_length:
|
||||
df_event.iloc[index_se]["pred"] = 1
|
||||
|
||||
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
|
||||
# encoding="gbk")
|
||||
df_all_event = df_all_event.append(df_event, ignore_index=True)
|
||||
else:
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df_event = pd.DataFrame(columns=columns)
|
||||
df = df_segment[df_segment["sampNo"] == sampNo].copy()
|
||||
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
|
||||
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
|
||||
thresh_Pred = df["thresh_Pred"].values
|
||||
thresh_Pred2 = thresh_Pred.copy()
|
||||
# 扩充
|
||||
for index_pred, pred in enumerate(thresh_Pred):
|
||||
if pred == 0:
|
||||
continue
|
||||
|
||||
for interval in range(1, thresh_event_interval):
|
||||
if pred == 1 and index_pred + interval < thresh_Pred.size:
|
||||
thresh_Pred2[index_pred + interval] = 1
|
||||
else:
|
||||
continue
|
||||
|
||||
# 判断
|
||||
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
|
||||
index_ar = np.where(same_ar)[0]
|
||||
count_ar = np.diff(index_ar)
|
||||
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
|
||||
|
||||
for value_index, value in enumerate(value_ar):
|
||||
SP = index_ar[value_index]
|
||||
EP = index_ar[value_index] + count_ar[value_index]
|
||||
# TP, FP
|
||||
if value > thresh_event_length:
|
||||
|
||||
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
|
||||
label_type = df["label_type"][SP:EP].max()
|
||||
new_label = df["new_label"][SP:EP].max()
|
||||
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
|
||||
SP, EP, thresh_Pred2[SP]]], columns=columns),
|
||||
ignore_index=True)
|
||||
if value > 30:
|
||||
print([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
|
||||
# 长度不够
|
||||
else:
|
||||
df["thresh_Pred"][SP:EP] = 0
|
||||
|
||||
# 对负样本进行统计
|
||||
for segment_no in df["segmentNo"].unique():
|
||||
df_temp = df[df["segmentNo"] == segment_no]
|
||||
if df_temp["thresh_Pred"].sum() > 0:
|
||||
continue
|
||||
|
||||
df_event = df_event.append(pd.DataFrame(
|
||||
[[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
|
||||
(segment_no + 1) * 30, 0]], columns=columns),
|
||||
ignore_index=True)
|
||||
|
||||
df_all_event = df_all_event.append(df_event, ignore_index=True)
|
||||
|
||||
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
|
||||
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
|
||||
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
|
||||
return df_all_event
|
||||
|
||||
|
||||
# 分sampNo保存结果,并不重合地可视化
|
||||
# inner_test
|
||||
|
||||
# 分sampNo将与标签不一致的另行保存,并不重合地可视化
|
||||
|
||||
# import shap
|
||||
# explainer = shap.TreeExplainer()
|
||||
# shap_values = explainer.shap_values()
|
||||
|
||||
if __name__ == '__main__':
|
||||
all_output_path = list(exam_path.rglob("KFold_*"))
|
||||
for exam_index, test_exam_path in enumerate(all_output_path):
|
||||
# test_exam_path = exam_path / test_exam_path
|
||||
set_environment(exam_index)
|
||||
test_and_analysis_and_visual(dataset_type="test")
|
||||
test_and_analysis_and_visual(dataset_type="all_test")
|
46
exam/004/utils/Draw_ConfusionMatrix.py
Normal file
46
exam/004/utils/Draw_ConfusionMatrix.py
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:Draw_ConfusionMatrix.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/08/10
|
||||
"""
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
||||
|
||||
|
||||
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
|
||||
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
|
||||
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
|
||||
ax.figure.colorbar(im, ax=ax)
|
||||
ax.set(xticks=np.arange(cm.shape[1]),
|
||||
yticks=np.arange(cm.shape[0]),
|
||||
xticklabels=classes, yticklabels=classes,
|
||||
title=title,
|
||||
ylabel='True label',
|
||||
xlabel='Predicted label')
|
||||
ax.set_ylim(len(classes) - 0.5, -0.5)
|
||||
|
||||
# Rotate the tick labels and set their alignment.
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
|
||||
normalize = False
|
||||
fmt = '.2f' if normalize else 'd'
|
||||
thresh = cm.max() * 0.8
|
||||
for i in range(cm.shape[0]):
|
||||
for j in range(cm.shape[1]):
|
||||
ax.text(j, i, format(cm[i, j], fmt),
|
||||
ha="center", va="center",
|
||||
color="white" if cm[i, j] > thresh else "black")
|
||||
fig_cm.tight_layout()
|
||||
fig_cm.savefig(save_path)
|
||||
plt.close()
|
||||
#
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
181
exam/004/utils/Preprocessing.py
Normal file
181
exam/004/utils/Preprocessing.py
Normal file
@ -0,0 +1,181 @@
|
||||
# encoding:utf-8
|
||||
|
||||
"""
|
||||
@ date: 2020-09-16
|
||||
@ author: jingxian
|
||||
@ illustration: Pre-processing
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import pywt
|
||||
from scipy import signal
|
||||
from scipy import fftpack
|
||||
|
||||
|
||||
def Dilate(x, N, g, M):
|
||||
returndata = np.array([])
|
||||
for num in range(N - M + 1):
|
||||
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
|
||||
return returndata
|
||||
|
||||
|
||||
def Eorde(x, N, g, M):
|
||||
returndata = np.array([])
|
||||
for num in range(N - M + 1):
|
||||
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
|
||||
return returndata
|
||||
|
||||
|
||||
def fin_turn(data, peak):
|
||||
if len(data) == 0 or len(peak) == 0: return peak
|
||||
return_peak = []
|
||||
for p in peak:
|
||||
minx, maxx = max(0, p - 100), min(len(data), p + 100)
|
||||
return_peak.append(minx + np.argmax(data[minx: maxx]))
|
||||
return return_peak
|
||||
|
||||
|
||||
class BCG_Operation():
|
||||
def __init__(self, sample_rate=1000):
|
||||
self.sample_rate = sample_rate
|
||||
|
||||
def down_sample(self, data=None, down_radio=10):
|
||||
if data is None:
|
||||
raise ValueError("data is None, please given an real value!")
|
||||
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
|
||||
self.sample_rate = self.sample_rate / down_radio
|
||||
return data
|
||||
|
||||
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
|
||||
"""
|
||||
分窗
|
||||
:param len_win: length of window
|
||||
:return: signal windows
|
||||
"""
|
||||
if (len_win is None) or (data is None):
|
||||
raise ValueError("length of window or data is None, please given an real value!")
|
||||
else:
|
||||
length = len_win * self.sample_rate # number point of a window
|
||||
# step of split windows
|
||||
step = length * coverage
|
||||
start = 0
|
||||
Splitdata = []
|
||||
while (len(data) - start >= length):
|
||||
Splitdata.append(data[int(start):int(start + length)])
|
||||
start += step
|
||||
if calculate_to_end and (len(data) - start > 2000):
|
||||
remain = len(data) - start
|
||||
start = start - step
|
||||
step = int(remain / 2000)
|
||||
start = start + step * 2000
|
||||
Splitdata.append(data[int(start):int(start + length)])
|
||||
return np.array(Splitdata), step
|
||||
elif calculate_to_end:
|
||||
return np.array(Splitdata), 0
|
||||
else:
|
||||
return np.array(Splitdata)
|
||||
|
||||
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
|
||||
"""
|
||||
:param type: Type of Butter. filter, lowpass, bandpass, ...
|
||||
:param lowcut: Low cutoff frequency
|
||||
:param highcut: High cutoff frequency
|
||||
:param order: Order of filter
|
||||
:return: Signal after filtering
|
||||
"""
|
||||
if type == "lowpass": # 低通滤波处理
|
||||
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == "bandpass": # 带通滤波处理
|
||||
low = low_cut / (self.sample_rate * 0.5)
|
||||
high = high_cut / (self.sample_rate * 0.5)
|
||||
b, a = signal.butter(order, [low, high], btype='bandpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == "highpass": # 高通滤波处理
|
||||
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
else: # 警告,滤波器类型必须有
|
||||
raise ValueError("Please choose a type of fliter")
|
||||
|
||||
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
|
||||
"""
|
||||
:param data: Input signal
|
||||
:param M: Length of structural element
|
||||
:return: Signal after filter
|
||||
"""
|
||||
if not data.any():
|
||||
raise ValueError("The input data is None, please given real value data")
|
||||
g = np.ones(M)
|
||||
Data_pre = np.insert(data, 0, np.zeros(M))
|
||||
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
|
||||
# Opening: 腐蚀 + 膨胀
|
||||
out1 = Eorde(Data_pre, len(Data_pre), g, M)
|
||||
out2 = Dilate(out1, len(out1), g, M)
|
||||
out2 = np.insert(out2, 0, np.zeros(M - 2))
|
||||
# Closing: 膨胀 + 腐蚀
|
||||
out5 = Dilate(Data_pre, len(Data_pre), g, M)
|
||||
out6 = Eorde(out5, len(out5), g, M)
|
||||
out6 = np.insert(out6, 0, np.zeros(M - 2))
|
||||
|
||||
baseline = (out2 + out6) / 2
|
||||
# -------------------------保留剩余价值------------------------
|
||||
data_filtered = Data_pre[:len(baseline)] - baseline
|
||||
data_filtered = data_filtered[M: M + len(data)]
|
||||
baseline = baseline[M:]
|
||||
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
|
||||
baseline[-1] = baseline[-2] = baseline[-3]
|
||||
if get_bre:
|
||||
return data_filtered, baseline
|
||||
else:
|
||||
return data_filtered
|
||||
|
||||
def Iirnotch(self, data=None, cut_fre=50, quality=3):
|
||||
"""陷波器"""
|
||||
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
|
||||
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
|
||||
"""
|
||||
切比雪夫滤波器
|
||||
:param data: Input signal
|
||||
:param rp: The maximum ripple allowed
|
||||
:param type: 'lowpass', 'bandpass, 'highpass'
|
||||
:param low_cut: Low cut-off fre
|
||||
:param high_cut: High cut-off fre
|
||||
:param order: The order of filter
|
||||
:return: Signal after filter
|
||||
"""
|
||||
if type == 'lowpass':
|
||||
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == 'bandpass':
|
||||
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == 'highpass':
|
||||
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
else:
|
||||
raise ValueError("The type of filter is None, please given the real value!")
|
||||
|
||||
def Envelope(self, data):
|
||||
"""取信号包络"""
|
||||
if len(data) <= 1: raise ValueError("Wrong input data")
|
||||
hx = fftpack.hilbert(data)
|
||||
return np.sqrt(hx ** 2, data ** 2)
|
||||
|
||||
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
|
||||
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
|
||||
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
|
||||
for c in c_level :
|
||||
new_wp[c] = wp[c]
|
||||
return new_wp.reconstruct()
|
||||
|
||||
# def em_decomposition(self, data):
|
||||
# from pyhht.emd import EMD
|
||||
# return EMD(data).decompose()
|
||||
|
84
exam/004/utils/calc_metrics.py
Normal file
84
exam/004/utils/calc_metrics.py
Normal file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:calc_metrics.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/02/12
|
||||
"""
|
||||
import torch
|
||||
import torchmetrics
|
||||
|
||||
|
||||
class CALC_METRICS:
|
||||
metrics = []
|
||||
nc = 0
|
||||
|
||||
def __init__(self, nc):
|
||||
self.nc = nc
|
||||
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
|
||||
self.valid_result = self.train_result = None
|
||||
|
||||
def update(self, pred, target):
|
||||
for part1 in self.metrics:
|
||||
part1.update(pred.cpu(), target.cpu())
|
||||
|
||||
def compute(self):
|
||||
result = []
|
||||
for part1 in self.metrics:
|
||||
result.append(part1.compute())
|
||||
|
||||
def reset(self):
|
||||
for part1 in self.metrics:
|
||||
part1.reset()
|
||||
|
||||
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
|
||||
temp_result = []
|
||||
for j in self.metrics:
|
||||
compute_result = (j.compute().cpu().numpy() * 100).tolist()
|
||||
temp_result.append(compute_result)
|
||||
|
||||
if epoch_type == "train":
|
||||
self.train_result = [loss] + temp_result
|
||||
elif epoch_type == "valid":
|
||||
self.valid_result = [loss] + temp_result
|
||||
else:
|
||||
pass
|
||||
|
||||
a = ""
|
||||
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
|
||||
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
|
||||
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
|
||||
return a
|
||||
|
||||
def wandb_log(self, wandb=None, cur_lr=None):
|
||||
if wandb is None:
|
||||
return
|
||||
|
||||
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
|
||||
dict_key = []
|
||||
for epoch_type in ["train", "valid"]:
|
||||
dict_key.append(epoch_type + "/" + "loss")
|
||||
for i in keyword:
|
||||
dict_key.append(epoch_type + "/" + i)
|
||||
|
||||
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
|
||||
log_dict["lr"] = cur_lr
|
||||
wandb.log(log_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
|
||||
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
|
||||
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
|
||||
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
|
||||
pred = torch.tensor(pred).cuda()
|
||||
true = torch.tensor(true).cuda()
|
||||
calc_metrics = CALC_METRICS(1)
|
||||
calc_metrics.update(pred, true)
|
||||
print(calc_metrics.get_matrix())
|
561
exam/005/generate_label_14.py
Normal file
561
exam/005/generate_label_14.py
Normal file
@ -0,0 +1,561 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:generate_label_11.0.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/09/05
|
||||
"""
|
||||
# 14.0
|
||||
# 手动均衡数量
|
||||
|
||||
# 13.0
|
||||
# 限制选择部分数据集,先做测试
|
||||
|
||||
# 12.0
|
||||
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
|
||||
|
||||
# 10.0
|
||||
# 使用提出质量差的信号
|
||||
|
||||
# 9.0
|
||||
# 增加 最新的质量标签 未使用
|
||||
|
||||
# 8.0
|
||||
# 生成 除低通气所有事件标签
|
||||
|
||||
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
|
||||
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
|
||||
|
||||
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
|
||||
|
||||
# 先提剔除极端值
|
||||
# 数值大于最高基准线或最低基准线
|
||||
# type1: average:1800 low:1200 high:2400
|
||||
# type2: average:2400 low:1800 high:3000
|
||||
|
||||
# 过多片段会造成平均值偏移
|
||||
# TODO
|
||||
# 加入体动标签,计算除体动外的平均值
|
||||
|
||||
# 最后降采为100hz
|
||||
|
||||
import time
|
||||
import logging
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import yaml
|
||||
from pathos import multiprocessing
|
||||
from tqdm import tqdm
|
||||
|
||||
# 数据集 和 标签 位置
|
||||
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
|
||||
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
|
||||
|
||||
# BCG 记录开始时间
|
||||
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
|
||||
dtype=object)
|
||||
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
|
||||
|
||||
# 读取每个数据集路径
|
||||
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
|
||||
all_numpy_dataset.sort()
|
||||
|
||||
# 划分后的数据集保存路径
|
||||
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
|
||||
dataset_save_path = Path(r"./dataset/")
|
||||
dataset_save_path.mkdir(exist_ok=True)
|
||||
|
||||
# 设置日志
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.NOTSET)
|
||||
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
|
||||
fh.setLevel(logging.NOTSET)
|
||||
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
|
||||
fh.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(fh)
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.NOTSET)
|
||||
ch.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(ch)
|
||||
|
||||
# all_label = []
|
||||
# 输出统计数据标题栏
|
||||
# logger.info("sampNo".center(8) +
|
||||
# "hpy_num".center(8) + "hpy_time".center(10) +
|
||||
# "csa_num".center(8) + "csa_time".center(10) +
|
||||
# "osa_num".center(8) + "osa_time".center(10) +
|
||||
# "msa_num".center(8) + "msa_time".center(10)
|
||||
# )
|
||||
|
||||
logger.info("sampNo".center(8) + ',' +
|
||||
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
|
||||
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
|
||||
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
|
||||
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
|
||||
)
|
||||
|
||||
base_random_seed = 42
|
||||
|
||||
window_second = 30
|
||||
step_second = 1
|
||||
|
||||
valid_ratio = 0.1
|
||||
test_ratio = 0.1
|
||||
|
||||
normal_event_quality_label = 0
|
||||
# valid_ratio = 5000
|
||||
# test_ratio = 10000
|
||||
|
||||
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
|
||||
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
|
||||
|
||||
# dataset sampNo for test
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
select_dataset = hyp["select_sampno"]
|
||||
|
||||
# 需要置成0的片段,前面不一定补零,还有可能上万
|
||||
disable_segment = {
|
||||
'221': [[0, 10000]],
|
||||
'670': [[0, 20000]],
|
||||
'683': [[0, 20000]],
|
||||
'704': [[0, 26000]],
|
||||
'726': [[0, 20000]],
|
||||
'736': [[0, 47000]],
|
||||
'933': [[0, 773560]],
|
||||
'935': [[0, 26600]],
|
||||
'952': [[0, 17000]],
|
||||
'955': [[0, 78000]],
|
||||
'961': [[0, 107000]],
|
||||
'962': [[0, 15100]],
|
||||
'966': [[0, 13120]],
|
||||
'967': [[0, 44000]],
|
||||
'1006': [[0, 60000]],
|
||||
'1009': [[0, 1000]],
|
||||
'1010': [[0, 49000]],
|
||||
'1296': [[0, 27000]],
|
||||
'1300': [[0, 33800]],
|
||||
'1301': [[0, 14000]],
|
||||
'1302': [[0, 5600]],
|
||||
'1374': [[0, 1000]],
|
||||
'1478': [[0, 998000]],
|
||||
|
||||
}
|
||||
|
||||
|
||||
# 生成数据集主函数
|
||||
def generate_label(No, dataset_path):
|
||||
"""
|
||||
:param dataset_path: 数据集路径
|
||||
:return:
|
||||
"""
|
||||
|
||||
# 获取数据编号
|
||||
sampNo = dataset_path.stem.split("samp")[0]
|
||||
# 标签路径
|
||||
label_path = bcg_label_path / f"export{sampNo}_all.csv"
|
||||
|
||||
if not label_path.exists():
|
||||
raise FileNotFoundError(f"{label_path} not exist")
|
||||
|
||||
if not dataset_path.exists():
|
||||
raise Exception(f"{dataset_path} not exists")
|
||||
|
||||
# 加载数据集
|
||||
select_numpy = np.load(dataset_path)
|
||||
select_numpy_len = len(select_numpy)
|
||||
|
||||
# 开头不合理片段置零
|
||||
if sampNo in disable_segment.keys():
|
||||
for sp, ep in disable_segment[sampNo]:
|
||||
select_numpy[sp:ep] = 0
|
||||
|
||||
# 剔除质量差信号
|
||||
if sampNo == "670":
|
||||
select_numpy = select_numpy[:17195 * 100]
|
||||
|
||||
# 获取前面补了多少0
|
||||
not_zero_point = 0
|
||||
for num in select_numpy:
|
||||
if num > 10:
|
||||
break
|
||||
not_zero_point += 1
|
||||
not_zero_point //= 100
|
||||
|
||||
# 读取标签
|
||||
label_csv = pd.read_csv(label_path, encoding='gbk')
|
||||
label_csv["new_start"] = label_csv["new_start"].astype("int")
|
||||
label_csv["new_end"] = label_csv["new_end"].astype("int")
|
||||
label_csv["Duration"] = label_csv["Duration"].astype("int")
|
||||
label_csv["new_label"] = label_csv["new_label"].fillna("2")
|
||||
label_csv["new_label"] = label_csv["new_label"].astype("int")
|
||||
# 剔除质量不好的样本
|
||||
# drop_csv = label_csv[
|
||||
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
|
||||
# label_csv = label_csv.drop(drop_csv.index)
|
||||
|
||||
# 事件片段与背景片段, 每个背景长度均为设定窗长
|
||||
segment_labels = []
|
||||
negative_labels = []
|
||||
hpy_num = csa_num = osa_num = msa_num = 0
|
||||
hpy_time = csa_time = osa_time = msa_time = 0
|
||||
|
||||
# 遍历全部事件并统计
|
||||
for i in range(len(label_csv)):
|
||||
# 进行LabelEncoder
|
||||
label = label_csv.iloc[i, :]
|
||||
# 如果事件在补零片段,则不添加到事件列表
|
||||
if label["new_end"] < not_zero_point:
|
||||
continue
|
||||
|
||||
if sampNo == "670" and label["new_start"] > 17195:
|
||||
continue
|
||||
|
||||
if label["new_end"] - label["new_start"] < 10:
|
||||
continue
|
||||
|
||||
# 将事件添加到事件列表
|
||||
if label["Event type"] == "Hypopnea":
|
||||
label_type = 1
|
||||
hpy_num += 1
|
||||
hpy_time += label["new_end"] - label["new_start"]
|
||||
|
||||
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
|
||||
# negative_labels.append(
|
||||
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
|
||||
continue
|
||||
elif label["Event type"] == "Central apnea":
|
||||
label_type = 2
|
||||
csa_num += 1
|
||||
csa_time += label["new_end"] - label["new_start"]
|
||||
elif label["Event type"] == "Obstructive apnea":
|
||||
label_type = 3
|
||||
osa_num += 1
|
||||
osa_time += label["new_end"] - label["new_start"]
|
||||
# MSA 认为是OSA
|
||||
elif label["Event type"] == "Mixed apnea":
|
||||
label_type = 3
|
||||
msa_num += 1
|
||||
msa_time += label["new_end"] - label["new_start"]
|
||||
else:
|
||||
continue
|
||||
# label_type = 0
|
||||
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
|
||||
print(sampNo, label)
|
||||
|
||||
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
|
||||
segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
|
||||
|
||||
# logger.info(sampNo.center(8) +
|
||||
# str(hpy_num).center(8) + str(hpy_time).center(10) +
|
||||
# str(csa_num).center(8) + str(csa_time).center(10) +
|
||||
# str(osa_num).center(8) + str(osa_time).center(10) +
|
||||
# str(msa_num).center(8) + str(msa_time).center(10))
|
||||
|
||||
# 设置随机树种子
|
||||
random_seed = base_random_seed + int(sampNo)
|
||||
|
||||
# 对于无事件的样本,直接将所有的片段
|
||||
if len(segment_labels) == 0:
|
||||
# 剔除补零片段(把开始点移动到补零结束)
|
||||
normal_SP = not_zero_point
|
||||
# 开头至少满足一个窗长
|
||||
if normal_SP < window_second:
|
||||
normal_SP = window_second
|
||||
|
||||
# 结束点为样本总长 除以 采样率
|
||||
normal_EP = select_numpy_len // 100
|
||||
label_type = 0
|
||||
# 正常时间编号为秒数 除以 30,即epoch
|
||||
negative_labels += [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1,
|
||||
SP1 + window_second] for SP1 in
|
||||
range(normal_SP - window_second + step_second, normal_EP - window_second + step_second,
|
||||
window_second)]
|
||||
|
||||
# 对于有事件的样本
|
||||
# 遍历事件,获取事件之间的背景片段
|
||||
for index in range(len(segment_labels) + 1):
|
||||
# 前一个事件的结尾 与 下一事件开头即为背景
|
||||
# 对于开头的无事件片段,则设定开始事件为0
|
||||
if index == 0:
|
||||
normal_SP = 0
|
||||
# 非开头片段,开始点为上一个事件的结尾
|
||||
else:
|
||||
# 加一秒 作为 缓冲
|
||||
normal_SP = segment_labels[index - 1][-1] + 1
|
||||
|
||||
# 最后一个事件则取到样本片段结尾
|
||||
if index == len(segment_labels):
|
||||
normal_EP = select_numpy_len // 100 - window_second
|
||||
# 否则结束事件取 本事件开头
|
||||
else:
|
||||
# 减一秒 作为 缓冲
|
||||
normal_EP = segment_labels[index][-2] - 1
|
||||
|
||||
# 剔除包含开头补零的片段
|
||||
if normal_EP < not_zero_point:
|
||||
continue
|
||||
|
||||
# 剔除开头不足30s的正常片段
|
||||
if normal_SP < window_second:
|
||||
continue
|
||||
label_type = 0
|
||||
|
||||
# 将背景事件按照滑窗距离逐个加入到背景事件中
|
||||
temp_1 = [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1, SP1 + window_second]
|
||||
for SP1 in range(normal_SP - window_second, normal_EP - window_second, window_second)]
|
||||
|
||||
negative_labels += temp_1
|
||||
|
||||
train_label, valid_label, test_label = [], [], []
|
||||
# assert (valid_ratio + test_ratio) < 1 <= len(segment_labels), f"{sampNo}训练集与测试集数量应小于总数据集数量{len(segment_labels)}"
|
||||
|
||||
# 对于测试数据全部直接保存
|
||||
if int(sampNo) in select_dataset:
|
||||
event_label = np.zeros(select_numpy_len // 100)
|
||||
quality_label = np.zeros(select_numpy_len // 100)
|
||||
# 用于存储事件标签
|
||||
for PN, segmentNo, label_type, new_label, SP, EP in segment_labels:
|
||||
event_label[SP:EP] = label_type
|
||||
|
||||
test_label = []
|
||||
# 剔除补零片段
|
||||
normal_SP = not_zero_point
|
||||
if normal_SP < window_second:
|
||||
normal_SP = window_second
|
||||
normal_EP = select_numpy_len // 100
|
||||
|
||||
# 分成指定窗长的滑窗片段
|
||||
test_label += [
|
||||
[sampNo, SP1 // 30, int(event_label[SP1 + window_second - step_second]),
|
||||
int(quality_label[SP1 + window_second - step_second]),
|
||||
SP1, SP1 + window_second] for SP1 in range(normal_SP - window_second + step_second,
|
||||
normal_EP - window_second + step_second, step_second)]
|
||||
|
||||
logger.info(sampNo.center(8) + ',' +
|
||||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||||
str(len(test_label)).center(10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
|
||||
',' + str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
|
||||
)
|
||||
|
||||
df2.loc[No] = [sampNo,
|
||||
str(0), str(0), str(0),
|
||||
str(0), str(0), str(0),
|
||||
str(len(test_label)),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||||
str(0), str(0), str(len(segment_labels))]
|
||||
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
|
||||
# np.array(test_label))
|
||||
df1 = pd.DataFrame(data=test_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
|
||||
index=False)
|
||||
|
||||
train_label, valid_label, test_label = [], [], []
|
||||
# 对于训练与验证集样本
|
||||
if True:
|
||||
# 打乱片段顺序
|
||||
np.random.seed(random_seed)
|
||||
np.random.shuffle(segment_labels)
|
||||
np.random.shuffle(negative_labels)
|
||||
|
||||
# 获取训练集、验证集、测试集分到事件个数
|
||||
if 0 < valid_ratio < 1:
|
||||
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
|
||||
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
|
||||
else:
|
||||
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
|
||||
valid_segment_num = valid_ratio
|
||||
|
||||
# 分别将各事件切分为30s, 步进1秒的片段
|
||||
for index in range(train_segment_num):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
train_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||||
|
||||
for index in range(train_segment_num, valid_segment_num):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
valid_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||||
|
||||
for index in range(valid_segment_num, len(segment_labels)):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||||
test_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(SP - window_second, EP - window_second + step_second * step_second)]
|
||||
|
||||
# 计算片段和事件个数
|
||||
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
|
||||
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
|
||||
len(segment_labels) - valid_segment_num)
|
||||
|
||||
# 数据集补偿
|
||||
# if train_num < 300:
|
||||
# train_num = 300 - train_num
|
||||
#
|
||||
# if valid_num < 300:
|
||||
# valid_num = 300 - valid_num
|
||||
#
|
||||
# if test_num < 300:
|
||||
# test_num = 300 - test_num
|
||||
|
||||
# 获取训练集、验证集、测试集分到背景个数
|
||||
# if 0 < valid_ratio < 1:
|
||||
# train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
|
||||
# valid_eve2 = int(len(negative_labels) * valid_ratio)
|
||||
# else:
|
||||
# train_eve2 = len(negative_labels) - valid_ratio - test_ratio
|
||||
# valid_eve2 = valid_ratio
|
||||
#
|
||||
# test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
|
||||
# 直接补充到足够个数的背景事件
|
||||
train_eve2 = max(train_eve, 300)
|
||||
valid_eve2 = max(valid_eve, 40)
|
||||
test_eve2 = max(test_eve, 40)
|
||||
|
||||
# 强制背景数量
|
||||
# train_eve2 = train_eve
|
||||
# valid_eve2 = valid_eve
|
||||
# test_eve2 = test_eve
|
||||
|
||||
# 添加背景事件数量
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[:train_eve2]:
|
||||
label_type = int(label_type)
|
||||
train_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||||
in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||||
train_eve2: train_eve2 + valid_eve2]:
|
||||
label_type = int(label_type)
|
||||
valid_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||||
in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||||
train_eve2 + valid_eve2:train_eve2 + valid_eve2 + test_eve2]:
|
||||
label_type = int(label_type)
|
||||
test_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||||
range(normal_SP, normal_EP, step_second)]
|
||||
|
||||
logger.info(sampNo.center(8) + ',' +
|
||||
str(len(train_label)).center(10) + ',' +
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(len(valid_label)).center(10) + ',' +
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(len(test_label)).center(10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||||
10) + ',' +
|
||||
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
|
||||
)
|
||||
|
||||
df2.loc[No] = [sampNo.center(8),
|
||||
str(len(train_label)),
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
|
||||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
|
||||
str(len(valid_label)),
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
|
||||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
|
||||
str(len(test_label)),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||||
str(train_eve), str(valid_eve), str(test_eve).center(10)]
|
||||
|
||||
def label_check(label_list):
|
||||
temp_list = []
|
||||
for sampNo, index, label_type, new_label, SP, EP in label_list:
|
||||
if EP - SP < window_second:
|
||||
print(sampNo, index, label_type, SP, EP)
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
if SP < 0:
|
||||
print(sampNo, index, label_type, SP, EP)
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||||
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||||
|
||||
for j in temp_list:
|
||||
label_list.remove(j)
|
||||
|
||||
label_check(train_label)
|
||||
label_check(valid_label)
|
||||
label_check(test_label)
|
||||
for sampNo, index, label_type, new_label, SP, EP in train_label:
|
||||
if EP - SP < window_second:
|
||||
print(sampNo, index, label_type, new_label, SP, EP)
|
||||
|
||||
if SP < 0:
|
||||
print(sampNo, index, label_type, new_label, SP, EP)
|
||||
|
||||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||||
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||||
|
||||
df1 = pd.DataFrame(data=train_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
|
||||
index=False)
|
||||
|
||||
df1 = pd.DataFrame(data=valid_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
|
||||
index=False)
|
||||
|
||||
df1 = pd.DataFrame(data=test_label,
|
||||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
|
||||
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
|
||||
# np.array(train_label))
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
|
||||
# np.array(valid_label))
|
||||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
|
||||
# np.array(test_label))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pool = multiprocessing.Pool(processes=44)
|
||||
# pool.map(generate_label, list(all_numpy_dataset))
|
||||
# pool.close()
|
||||
# pool.join()
|
||||
|
||||
df2 = pd.DataFrame(data=None,
|
||||
columns=["sampNo",
|
||||
"train_num", "train_P", "train_N",
|
||||
"valid_num", "valid_P", "valid_N",
|
||||
"test_num", "test_P", "test_N",
|
||||
"train_eve", "valid_eve", "test_eve"])
|
||||
|
||||
temp = []
|
||||
for one_dataset in all_numpy_dataset:
|
||||
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
|
||||
temp.append(one_dataset)
|
||||
# for one_dataset in temp:
|
||||
# all_numpy_dataset.remove(one_dataset)
|
||||
|
||||
for No, one_dataset in enumerate(temp):
|
||||
generate_label(No, one_dataset)
|
||||
|
||||
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
|
||||
# generate_label(all_numpy_dataset[0])
|
178
exam/005/load_dataset.py
Normal file
178
exam/005/load_dataset.py
Normal file
@ -0,0 +1,178 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:load_dataset.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2021/12/03
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch.utils.data
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
from utils.Preprocessing import BCG_Operation
|
||||
|
||||
preprocessing = BCG_Operation()
|
||||
preprocessing.sample_rate = 100
|
||||
"""
|
||||
1. 读取方法
|
||||
# 无论是否提前切分,均提前转成npy格式
|
||||
# 1.1 提前预处理,切分好后生成npy,直接载入切分好的片段 内存占用多 读取简单
|
||||
使用此方法: 1.2 提前预处理,载入整夜数据,切分好后生成csv或xls,根据片段读取 内存占用少 读取较为复杂
|
||||
"""
|
||||
|
||||
datasets = {}
|
||||
|
||||
|
||||
# 减少重复读取
|
||||
def read_dataset(data_path, augment=None):
|
||||
data_path = Path(data_path)
|
||||
try:
|
||||
f = []
|
||||
if data_path.is_dir():
|
||||
dataset_list = list(data_path.rglob("*.npy"))
|
||||
dataset_list.sort()
|
||||
f += dataset_list
|
||||
elif data_path.is_file():
|
||||
raise Exception(f'dataset path should be a dir')
|
||||
else:
|
||||
raise Exception(f'{data_path} does not exist')
|
||||
except Exception as e:
|
||||
raise Exception(f'Error loading data from {data_path}: {e} \n')
|
||||
|
||||
print("loading dataset")
|
||||
for i in tqdm(f):
|
||||
select_dataset = np.load(i)
|
||||
select_dataset = preprocessing.Butterworth(select_dataset, "lowpass", low_cut=20, order=3)
|
||||
if augment is not None:
|
||||
select_dataset = augment(select_dataset)
|
||||
datasets[i.name.split("samp")[0]] = select_dataset
|
||||
|
||||
|
||||
# 用第二种方法读取
|
||||
class ApneaDataset(Dataset):
|
||||
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
|
||||
self.data_path = data_path
|
||||
self.label_path = label_path
|
||||
self.segment_augment = segment_augment
|
||||
self.labels = None
|
||||
self.dataset_type = dataset_type
|
||||
self.select_sampNo = select_sampno
|
||||
|
||||
# self._getAllData()
|
||||
self._getAllLabels()
|
||||
|
||||
def __getitem__(self, index):
|
||||
# PN patience number
|
||||
# SP/EP start point, end point
|
||||
# temp_label.append([sampNo, label[-1], i, hpy_num, csa_num, osa_num, mean_low, flow_low])
|
||||
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
dataset = datasets[str(PN)]
|
||||
segment = self.segment_augment(dataset, SP, EP)
|
||||
return (*segment, int(float(label_type) > 1))
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
def count_SA(self):
|
||||
return sum(self.labels[:, 3] > 1)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.labels)
|
||||
|
||||
def _getAllLabels(self):
|
||||
label_path = Path(self.label_path)
|
||||
if not label_path.exists():
|
||||
raise Exception(f'{self.label_path} does not exist')
|
||||
|
||||
try:
|
||||
f = []
|
||||
if label_path.is_dir():
|
||||
if self.dataset_type == "train":
|
||||
label_list = list(label_path.rglob("*_train_label.csv"))
|
||||
elif self.dataset_type == "valid":
|
||||
label_list = list(label_path.rglob("*_valid_label.csv"))
|
||||
elif self.dataset_type == "test":
|
||||
label_list = list(label_path.glob("*_sa_test_label.csv"))
|
||||
# label_list = list(label_path.rglob("*_test_label.npy"))
|
||||
elif self.dataset_type == "all_test":
|
||||
label_list = list(label_path.rglob("*_sa_all_label.csv"))
|
||||
else:
|
||||
raise ValueError("self.dataset type error")
|
||||
# label_list = list(label_path.rglob("*_label.npy"))
|
||||
label_list.sort()
|
||||
f += label_list
|
||||
elif label_path.is_file():
|
||||
raise Exception(f'dataset path should be a dir')
|
||||
else:
|
||||
raise Exception(f'{self.label_path} does not exist')
|
||||
except Exception as e:
|
||||
raise Exception(f'Error loading data from {self.label_path}: {e} \n')
|
||||
print("loading labels")
|
||||
for i in tqdm(f):
|
||||
if int(i.name.split("_")[0]) not in self.select_sampNo:
|
||||
continue
|
||||
|
||||
if self.labels is None:
|
||||
self.labels = pd.read_csv(i).to_numpy(dtype=int)
|
||||
else:
|
||||
labels = pd.read_csv(i).to_numpy(dtype=int)
|
||||
if len(labels) > 0:
|
||||
self.labels = np.concatenate((self.labels, labels))
|
||||
# self.labels = self.labels[:10000]
|
||||
print(f"{self.dataset_type} length is {len(self.labels)}")
|
||||
|
||||
|
||||
class TestApneaDataset(ApneaDataset):
|
||||
def __init__(self, data_path, label_path, dataset_type, select_sampno, segment_augment=None):
|
||||
super(TestApneaDataset, self).__init__(
|
||||
data_path=data_path,
|
||||
label_path=label_path,
|
||||
dataset_type=dataset_type,
|
||||
select_sampno=select_sampno,
|
||||
segment_augment=segment_augment
|
||||
)
|
||||
|
||||
def __getitem__(self, index):
|
||||
PN, segmentNo, label_type, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
segment = datasets[str(PN)][int(SP) * 100:int(EP) * 100].copy()
|
||||
if self.segment_augment is not None:
|
||||
segment = self.segment_augment(segment)
|
||||
return segment, int(float(label_type) > 1), PN, segmentNo, SP, EP
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
|
||||
class TestApneaDataset2(ApneaDataset):
|
||||
def __init__(self, data_path, label_path, select_sampno, dataset_type, segment_augment=None):
|
||||
super(TestApneaDataset2, self).__init__(
|
||||
data_path=data_path,
|
||||
label_path=label_path,
|
||||
dataset_type=dataset_type,
|
||||
segment_augment=segment_augment,
|
||||
select_sampno=select_sampno
|
||||
)
|
||||
|
||||
def __getitem__(self, index):
|
||||
PN, segmentNo, label_type, new_label, SP, EP = self.labels[index]
|
||||
# PN, label, SP, EP, hpy_num, csa_num, osa_num, mean_low, flow_low = self.labels[index]
|
||||
|
||||
if isinstance(datasets, dict):
|
||||
dataset = datasets[str(PN)]
|
||||
segment = self.segment_augment(dataset, SP, EP)
|
||||
return (*segment, int(float(label_type) > 1), PN, segmentNo, label_type, new_label, SP, EP)
|
||||
else:
|
||||
raise Exception(f'dataset read failure!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
284
exam/005/main.py
Normal file
284
exam/005/main.py
Normal file
@ -0,0 +1,284 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@time:2021/10/15
|
||||
"""
|
||||
import os
|
||||
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import time
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.cuda
|
||||
from tqdm import tqdm
|
||||
from torchinfo import summary
|
||||
from load_dataset import ApneaDataset, read_dataset
|
||||
|
||||
from torch import nn
|
||||
from utils.calc_metrics import CALC_METRICS
|
||||
from sklearn.model_selection import KFold
|
||||
from model.Hybrid_Net001 import HYBRIDNET001
|
||||
# from utils.LossFunction import Foca1lLoss
|
||||
from my_augment import my_augment, my_segment_augment
|
||||
# 加载配置
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = hyp["GPU"]
|
||||
os.environ["WANDB_MODE"] = "dryrun"
|
||||
|
||||
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
|
||||
# 读取地址参数
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
label_path = hyp["Path"]["label"]
|
||||
|
||||
save_dir = Path(hyp["Path"]["save"]) / (Path(hyp["Path"]["save"]).name + "_" + realtime)
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 设置日志
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.NOTSET)
|
||||
fh = logging.FileHandler(save_dir / (realtime + ".log"), mode='a')
|
||||
fh.setLevel(logging.NOTSET)
|
||||
fh.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
|
||||
logger.addHandler(fh)
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.NOTSET)
|
||||
ch.setFormatter(logging.Formatter("%(asctime)s: %(message)s"))
|
||||
logger.addHandler(ch)
|
||||
logging.getLogger('matplotlib.font_manager').disabled = True
|
||||
logger.info("------------------------------------")
|
||||
logger.info('hyper_parameters: ' + ', '.join(f'{k}={v}\n' for k, v in hyp.items()))
|
||||
|
||||
# 备份配置
|
||||
with open(save_dir / 'settings.yaml', 'w') as f:
|
||||
yaml.dump(hyp, f, sort_keys=False)
|
||||
|
||||
# Hyper-parameters
|
||||
gpu = torch.cuda.is_available()
|
||||
epochs = hyp["epoch"]
|
||||
lr = hyp["lr"]
|
||||
nc = hyp["nc"]
|
||||
bs = hyp["batch_size"]
|
||||
worker = hyp["number_worker"]
|
||||
select_sampno = hyp["select_sampno"]
|
||||
|
||||
read_dataset(data_path, augment=my_augment)
|
||||
calc_metrics = CALC_METRICS(nc)
|
||||
|
||||
|
||||
# 开始训练
|
||||
# 训练
|
||||
def model_train(model, train_loader, optimizer, scheduler, loss_func, training_state):
|
||||
model.train()
|
||||
train_loss = 0.0
|
||||
optimizer.zero_grad()
|
||||
|
||||
pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=80)
|
||||
pbar.set_description(training_state)
|
||||
for i, (resp, stft, labels) in pbar:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
|
||||
# 所以输入为【batch_size, 1, 3000】 3000 = 30秒 * 100Hz
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = out.squeeze(-1)
|
||||
loss = loss_func(out, labels.float())
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
# 余弦退火传入变量
|
||||
# scheduler.step(epoch + i / len(train_loader.dataset))
|
||||
# 自适应调整传入变量
|
||||
scheduler.step(loss)
|
||||
|
||||
loss_value = loss.item()
|
||||
train_loss += loss_value
|
||||
# cur_lr = optimizer.param_groups[-1]['lr']
|
||||
# labels = torch.unsqueeze(labels, dim=1)
|
||||
# out = F.softmax(out, dim=1)
|
||||
# out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
# if i % 20 == 0:
|
||||
# pbar.write(calc_metrics.get_matrix(loss=loss_value, cur_lr=cur_lr, epoch=epoch))
|
||||
|
||||
cur_lr = optimizer.param_groups[-1]['lr']
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_valid(model, valid_loader, wdir, loss_func):
|
||||
model.eval()
|
||||
valid_loss = 0.0
|
||||
for resp, stft, labels in valid_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = out.squeeze(-1)
|
||||
# out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels.float())
|
||||
|
||||
valid_loss += loss.item()
|
||||
# labels = torch.unsqueeze(labels, dim=1)
|
||||
# out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
valid_loss /= len(valid_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=valid_loss, epoch=epoch, epoch_type="valid"))
|
||||
global best_f1
|
||||
valid_f1 = calc_metrics.metrics[-1].compute()
|
||||
if valid_f1 > best_f1:
|
||||
best_f1 = valid_f1
|
||||
torch.save(model.state_dict(), wdir / f"best_{epoch}_{str(round(float(valid_f1), 3))}.pt")
|
||||
torch.save(model.state_dict(), wdir / f"best.pt")
|
||||
if wandb is not None:
|
||||
wandb.run.summary["best_f1"] = valid_f1
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = out.squeeze(-1)
|
||||
# out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels.float())
|
||||
|
||||
test_loss += loss.item()
|
||||
# labels = torch.unsqueeze(labels, dim=1)
|
||||
# out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
import wandb
|
||||
except ImportError:
|
||||
wandb = None
|
||||
prefix = 'wandb: '
|
||||
logger.info(f"{prefix}Install Weights & Biases logger with 'pip install wandb'")
|
||||
|
||||
if wandb is not None and wandb.run is None:
|
||||
wandb_run = wandb.init(
|
||||
config=hyp,
|
||||
name=save_dir.stem,
|
||||
project=hyp["project"],
|
||||
notes=hyp["Note"],
|
||||
tags=hyp["tags"],
|
||||
entity=hyp["entity"],
|
||||
)
|
||||
exam_name = Path("./").absolute().name
|
||||
|
||||
model_net = eval(hyp["model_name"])()
|
||||
model_net.initialize_weights()
|
||||
summary(model_net, [(1, 120, 1), (1, 1, 121, 26)])
|
||||
|
||||
time.sleep(3)
|
||||
if gpu:
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
logger.info(f'Start FOLD {fold} / {k_folds}----------------------')
|
||||
train_set = [select_sampno[i] for i in train_ids]
|
||||
test_set = [select_sampno[i] for i in test_ids]
|
||||
logger.info(f'Train_Set:{train_set}')
|
||||
logger.info(f'Independent_Test_Set:{test_set}')
|
||||
|
||||
sub_save_dir = save_dir / f"KFold_{fold}"
|
||||
sub_save_dir.mkdir(exist_ok=True, parents=True)
|
||||
wdir = sub_save_dir / "weights"
|
||||
wdir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
hyp["train_set"] = train_set
|
||||
hyp["test_set"] = test_set
|
||||
with open(sub_save_dir / 'settings.yaml', 'w') as f:
|
||||
yaml.dump(hyp, f, sort_keys=False)
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
model_net = eval(hyp["model_name"])()
|
||||
model_net.initialize_weights()
|
||||
if gpu:
|
||||
model_net.cuda()
|
||||
|
||||
logger.info(f"Weight is {[(len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]}")
|
||||
# 损失函数与优化器
|
||||
loss_function = nn.BCELoss(
|
||||
weight=torch.Tensor([(len(train_dataset) - train_dataset.count_SA()) / train_dataset.count_SA()]).cuda())
|
||||
|
||||
# loss_func = nn.BCEWithLogitsLoss()
|
||||
# loss_func = FocalLoss(class_num=nc, alpha=0.75, size_average="sum")
|
||||
|
||||
# momentum
|
||||
# nesterov 牛顿动量
|
||||
# weight_decay L2正则
|
||||
# optimizer = torch.optim.SGD(model_net.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=1e-6)
|
||||
|
||||
optimizer = torch.optim.Adam(model_net.parameters(), lr=lr)
|
||||
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(hyp["T_max"]),
|
||||
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
|
||||
patience=2836, min_lr=1e-8,
|
||||
verbose=True)
|
||||
|
||||
# 参数记录
|
||||
best_f1 = 0
|
||||
|
||||
for epoch in range(epochs):
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
98
exam/005/model/Hybrid_Net001.py
Normal file
98
exam/005/model/Hybrid_Net001.py
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:andrew
|
||||
@file:Hybrid_Net001.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/09/30
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torchinfo import summary
|
||||
from torch import cat
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
|
||||
# 输入时长
|
||||
WHOLE_SEGMENT_SECOND = 30
|
||||
|
||||
# 呼吸采样率
|
||||
RESPIRATORY_FRE = 4
|
||||
|
||||
# BCG 时频图大小
|
||||
BCG_GRAPH_SIZE = (26, 121)
|
||||
|
||||
|
||||
class HYBRIDNET001(nn.Module):
|
||||
def __init__(self, num_classes=1, init_weights=True):
|
||||
super(HYBRIDNET001, self).__init__()
|
||||
|
||||
self.lstm = nn.LSTM(input_size=1,
|
||||
hidden_size=16,
|
||||
num_layers=1,
|
||||
bidirectional=True,
|
||||
batch_first=True)
|
||||
|
||||
self.right = nn.Sequential(
|
||||
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(16),
|
||||
|
||||
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(32),
|
||||
|
||||
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3),
|
||||
stride=(1, 1), padding=(1, 1)),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=(2, 2), padding=1),
|
||||
nn.BatchNorm2d(32)
|
||||
|
||||
|
||||
|
||||
)
|
||||
|
||||
self.classifier = nn.Sequential(
|
||||
# nn.Dropout(p=0.5),
|
||||
nn.Linear((120 * 32 + 32 * 16 * 4), 512),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(512, num_classes),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
if init_weights:
|
||||
self.initialize_weights()
|
||||
|
||||
def initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x1, (_, _) = self.lstm(x1)
|
||||
x2 = self.right(x2)
|
||||
# print(x1.shape)
|
||||
# print(x2.shape)
|
||||
x1 = torch.flatten(x1, start_dim=1)
|
||||
x2 = torch.flatten(x2, start_dim=1)
|
||||
x = torch.cat((x1, x2), dim=1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = HYBRIDNET001(1).cuda()
|
||||
summary(model, [(32, 120, 1), (32, 1, 121, 26)])
|
51
exam/005/my_augment.py
Normal file
51
exam/005/my_augment.py
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:my_augment.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/07/26
|
||||
"""
|
||||
from utils.Preprocessing import BCG_Operation
|
||||
import numpy as np
|
||||
from scipy.signal import stft
|
||||
|
||||
preprocessing = BCG_Operation()
|
||||
preprocessing.sample_rate = 100
|
||||
|
||||
|
||||
def my_augment(dataset):
|
||||
dataset = dataset - dataset.mean()
|
||||
dataset = preprocessing.Iirnotch(dataset)
|
||||
dataset = preprocessing.Butterworth(dataset, "lowpass", low_cut=20, order=6)
|
||||
dataset_low = preprocessing.Butterworth(dataset, "lowpass", low_cut=0.7, order=6)
|
||||
dataset_high = preprocessing.Butterworth(dataset, "highpass", high_cut=1, order=6)
|
||||
dataset = {"low": dataset_low,
|
||||
"high": dataset_high}
|
||||
return dataset
|
||||
|
||||
|
||||
def get_stft(x, fs, n):
|
||||
print(len(x))
|
||||
f, t, amp = stft(x, fs, nperseg=n)
|
||||
z = np.abs(amp.copy())
|
||||
return f, t, z
|
||||
|
||||
|
||||
def my_segment_augment(dataset, SP, EP):
|
||||
dataset_low = dataset["low"][int(SP) * 100:int(EP) * 100].copy()
|
||||
dataset_high = dataset["high"][int(SP) * 100:int(EP) * 100].copy()
|
||||
|
||||
dataset_low = dataset_low[::25]
|
||||
dataset_low = dataset_low.reshape(dataset_low.shape[0], 1)
|
||||
|
||||
_, _, dataset_high = stft(dataset_high, 100, nperseg=50)
|
||||
dataset_high = dataset_high.astype(np.float).T
|
||||
dataset_high = dataset_high.reshape(1, dataset_high.shape[0], dataset_high.shape[1])
|
||||
|
||||
return dataset_low, dataset_high
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
42
exam/005/settings.yaml
Normal file
42
exam/005/settings.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
# environment config
|
||||
GPU: "1"
|
||||
|
||||
|
||||
# dataset config
|
||||
Path:
|
||||
dataset: /home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/
|
||||
label: ./dataset/
|
||||
save: ./output/
|
||||
|
||||
batch_size: 256
|
||||
number_worker: 0
|
||||
model_name: HYBRIDNET001
|
||||
select_sampno:
|
||||
- 282
|
||||
- 726
|
||||
- 229
|
||||
- 582
|
||||
- 286
|
||||
- 966
|
||||
- 1000
|
||||
- 1004
|
||||
- 1006
|
||||
- 1009
|
||||
|
||||
# train hyperparameters config
|
||||
epoch: 50
|
||||
lr: 0.00001
|
||||
nc: 1
|
||||
|
||||
# wandb config
|
||||
entity: "marques"
|
||||
project: "Sleep_Apnea_HYBRID00X"
|
||||
Note: "HYBRID001 NOBD STFT RESP DUAL"
|
||||
tags: ["ReduceLROnPlateau", "RESP LSTM", "STFT CNN"]
|
||||
|
||||
|
||||
# "CW":class_weight
|
||||
|
||||
|
||||
# "CosineAnnealingLR"
|
||||
# "ReduceLROnPlateau"
|
454
exam/005/test_save_result.py
Normal file
454
exam/005/test_save_result.py
Normal file
@ -0,0 +1,454 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:test_analysis.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/02/21
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import torch.cuda
|
||||
import numpy as np
|
||||
import yaml
|
||||
from matplotlib import pyplot as plt
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from load_dataset import TestApneaDataset2, read_dataset
|
||||
from utils.Draw_ConfusionMatrix import draw_confusionMatrix
|
||||
from torch import nn
|
||||
from utils.calc_metrics import CALC_METRICS
|
||||
from my_augment import my_augment, my_segment_augment
|
||||
from model.Hybrid_Net001 import HYBRIDNET001
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
exam_path = Path("./output/")
|
||||
|
||||
# 置信率阈值
|
||||
thresh = 0.5
|
||||
# 间隔最小距离
|
||||
thresh_event_interval = 2
|
||||
# 最小事件长度
|
||||
thresh_event_length = 8
|
||||
|
||||
#
|
||||
event_thresh = 1
|
||||
|
||||
severity_path = Path(r"/home/marques/code/marques/apnea/dataset/loc_first_csa.xlsx")
|
||||
severity_label = {"all": "none"}
|
||||
severity_df = pd.read_excel(severity_path)
|
||||
for one_data in severity_df.index:
|
||||
one_data = severity_df.loc[one_data]
|
||||
severity_label[str(one_data["数据编号"])] = one_data["程度"]
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
gpu = torch.cuda.is_available()
|
||||
|
||||
num_classes = 1
|
||||
calc_metrics = CALC_METRICS(num_classes)
|
||||
|
||||
with open("./settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
read_dataset(data_path, augment=my_augment)
|
||||
del hyp
|
||||
|
||||
# 默认取最新的文件夹
|
||||
all_output_path, output_path, segments_results_save_path, events_results_save_path, = [None, ] * 4
|
||||
my_augment, model_path, label_path, data_path, model, model_name = [None, ] * 6
|
||||
train_set, test_set = None, None
|
||||
loss_func = nn.CrossEntropyLoss()
|
||||
|
||||
columns = ["sampNo", "segmentNo", "label_type", "new_label", "SP", "EP", "pred"]
|
||||
columns2 = ["sampNo", "severity", "origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN",
|
||||
"acc", "recall", "spec", "pre", "NPV", "F1score", "support"]
|
||||
|
||||
|
||||
def set_environment(i):
|
||||
global output_path, segments_results_save_path, events_results_save_path, model_path, label_path, data_path, \
|
||||
model, model_name, train_set, test_set
|
||||
|
||||
output_path = all_output_path[i]
|
||||
print(output_path)
|
||||
segments_results_save_path = (output_path / "segments_results")
|
||||
segments_results_save_path.mkdir(exist_ok=True)
|
||||
events_results_save_path = (output_path / "events_results")
|
||||
events_results_save_path.mkdir(exist_ok=True)
|
||||
|
||||
# 加载配置
|
||||
with open(output_path / "settings.yaml") as f:
|
||||
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
|
||||
data_path = hyp["Path"]["dataset"]
|
||||
label_path = hyp["Path"]["label"]
|
||||
train_set = hyp["train_set"]
|
||||
test_set = hyp["test_set"]
|
||||
|
||||
model_path = output_path / "weights" / "best.pt"
|
||||
model = eval(hyp["model_name"])()
|
||||
model_name = hyp["model_name"]
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.cuda()
|
||||
model.eval()
|
||||
|
||||
|
||||
def test_and_analysis_and_visual(dataset_type):
|
||||
if dataset_type == "test":
|
||||
sampNo = train_set
|
||||
elif dataset_type == "all_test":
|
||||
sampNo = test_set
|
||||
test_dataset = TestApneaDataset2(data_path, label_path, select_sampno=sampNo, dataset_type=dataset_type,
|
||||
segment_augment=my_segment_augment)
|
||||
test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True, num_workers=0)
|
||||
|
||||
test_loss = 0.0
|
||||
|
||||
df_segment = pd.DataFrame(columns=columns)
|
||||
|
||||
for one in tqdm(test_loader, total=len(test_loader)):
|
||||
resp, stft, labels = one[:3]
|
||||
other_info = one[3:]
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
out = model(resp, stft)
|
||||
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = F.softmax(out, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
# one[0] = list(one[0].cpu().numpy())
|
||||
# one[1] = list(one[1].cpu().numpy())
|
||||
# one = one[1:]
|
||||
# out = out.view(1, -1).cpu().numpy().tolist()
|
||||
# one += out
|
||||
# result_record += [i for i in list(np.array(one, dtype=object).transpose(1, 0))]
|
||||
|
||||
one2 = np.array([i.cpu().numpy() for i in (other_info + [out.squeeze()])])
|
||||
one2 = one2.transpose((1, 0))
|
||||
df = pd.DataFrame(data=one2, columns=columns)
|
||||
df_segment = df_segment.append(df, ignore_index=True)
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
print(calc_metrics.get_matrix(loss=test_loss, epoch=0, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
df_segment["thresh_label"] = 1 * (df_segment["label_type"] > event_thresh).copy()
|
||||
df_segment["thresh_Pred"] = 1 * (df_segment["pred"] > thresh).copy()
|
||||
df_segment["pred"] = df_segment["pred"].copy().apply(lambda x: round(x, 3))
|
||||
|
||||
# 片段级分析
|
||||
df_segment_metrics = analysis_results(df_segment, segments_results_save_path, dataset_type)
|
||||
|
||||
# 绘制混淆矩阵
|
||||
# 每个样本都绘制一份
|
||||
confusionMatrix(df_segment_metrics, segments_results_save_path, dataset_type)
|
||||
# 绘制柱状图
|
||||
|
||||
# 事件级分析
|
||||
# 对于inner_test 每个编号就是一个事件
|
||||
# 而对于整晚的independence_test,需要另行计算
|
||||
df_all_event = segment_to_event(df_segment, dataset_type)
|
||||
df_event_metrics = analysis_results(df_all_event, events_results_save_path, dataset_type, is_event=True)
|
||||
confusionMatrix(df_event_metrics, events_results_save_path, dataset_type)
|
||||
|
||||
# 剔除质量不好的样本
|
||||
df_bad_segment = df_segment[
|
||||
(df_segment["label_type"].isin([2, 3])) & (df_segment["new_label"] == 2)]
|
||||
df_select_segment = df_segment.drop(df_bad_segment.index)
|
||||
df_select_segment_metrics = analysis_results(df_select_segment, segments_results_save_path / "remove_2",
|
||||
dataset_type)
|
||||
df_select_event = segment_to_event(df_select_segment, dataset_type)
|
||||
df_event_metrics = analysis_results(df_select_event, events_results_save_path / "remove_2", dataset_type,
|
||||
is_event=True)
|
||||
|
||||
|
||||
def analysis_results(df_result, base_path, dataset_type, is_event=False):
|
||||
if df_result.empty:
|
||||
print(base_path, dataset_type, "is_empty")
|
||||
return None
|
||||
|
||||
(base_path / dataset_type).mkdir(exist_ok=True, parents=True)
|
||||
|
||||
all_sampNo = df_result["sampNo"].unique()
|
||||
df_metrics = pd.DataFrame(columns=columns2)
|
||||
df_metrics.loc[0] = 0
|
||||
df_metrics.loc[0]["sampNo"] = dataset_type
|
||||
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df = df_result[df_result["sampNo"] == sampNo]
|
||||
df.to_csv(
|
||||
base_path / dataset_type /
|
||||
f"{int(sampNo)}_{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_result.csv",
|
||||
index=False)
|
||||
|
||||
df_metrics.loc[index + 1] = np.NAN
|
||||
df_metrics.loc[index + 1]["sampNo"] = str(int(sampNo))
|
||||
df_metrics.loc[index + 1]["support"] = df.shape[0]
|
||||
df_metrics.loc[index + 1]["severity"] = severity_label[str(int(sampNo))]
|
||||
|
||||
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
|
||||
# continue
|
||||
# else:
|
||||
df_metrics.loc[index + 1]["origin_P"] = df[df["thresh_label"] == 1].shape[0]
|
||||
df_metrics.loc[index + 1]["origin_N"] = df[df["thresh_label"] == 0].shape[0]
|
||||
df_metrics.loc[index + 1]["pred_P"] = df[df["thresh_Pred"] == 1].shape[0]
|
||||
df_metrics.loc[index + 1]["pred_N"] = df[df["thresh_Pred"] == 0].shape[0]
|
||||
df_metrics.loc[index + 1]["T"] = df[df["thresh_Pred"] == df["thresh_label"]].shape[0]
|
||||
df_metrics.loc[index + 1]["F"] = df[df["thresh_Pred"] != df["thresh_label"]].shape[0]
|
||||
df_metrics.loc[index + 1]["TP"] = \
|
||||
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
|
||||
df_metrics.loc[index + 1]["FP"] = \
|
||||
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 1)].shape[0]
|
||||
df_metrics.loc[index + 1]["TN"] = \
|
||||
df[(df["thresh_Pred"] == df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
|
||||
df_metrics.loc[index + 1]["FN"] = \
|
||||
df[(df["thresh_Pred"] != df["thresh_label"]) & (df["thresh_Pred"] == 0)].shape[0]
|
||||
|
||||
df_metrics.loc[0]["origin_P"] += df_metrics.loc[index + 1]["origin_P"]
|
||||
df_metrics.loc[0]["origin_N"] += df_metrics.loc[index + 1]["origin_N"]
|
||||
df_metrics.loc[0]["pred_P"] += df_metrics.loc[index + 1]["pred_P"]
|
||||
df_metrics.loc[0]["pred_N"] += df_metrics.loc[index + 1]["pred_N"]
|
||||
df_metrics.loc[0]["T"] += df_metrics.loc[index + 1]["T"]
|
||||
df_metrics.loc[0]["F"] += df_metrics.loc[index + 1]["F"]
|
||||
df_metrics.loc[0]["TP"] += df_metrics.loc[index + 1]["TP"]
|
||||
df_metrics.loc[0]["FP"] += df_metrics.loc[index + 1]["FP"]
|
||||
df_metrics.loc[0]["TN"] += df_metrics.loc[index + 1]["TN"]
|
||||
df_metrics.loc[0]["FN"] += df_metrics.loc[index + 1]["FN"]
|
||||
df_metrics.loc[0]["support"] += df_metrics.loc[index + 1]["support"]
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_metrics.loc[index + 1][col] = df_metrics.loc[index + 1][col] if df_metrics.loc[index + 1][
|
||||
col] != 0 else np.NAN
|
||||
|
||||
df_metrics.loc[index + 1]["acc"] = df_metrics.iloc[index + 1]["T"] / df_metrics.iloc[index + 1]["support"]
|
||||
df_metrics.loc[index + 1]["recall"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["origin_P"]
|
||||
df_metrics.loc[index + 1]["spec"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["origin_N"]
|
||||
df_metrics.loc[index + 1]["pre"] = df_metrics.iloc[index + 1]["TP"] / df_metrics.iloc[index + 1]["pred_P"]
|
||||
df_metrics.loc[index + 1]["NPV"] = df_metrics.iloc[index + 1]["TN"] / df_metrics.iloc[index + 1]["pred_N"]
|
||||
df_metrics.loc[index + 1]["F1score"] = 2 * df_metrics.iloc[index + 1]["recall"] * df_metrics.iloc[index + 1][
|
||||
"pre"] / (df_metrics.iloc[index + 1]["recall"] + df_metrics.iloc[index + 1]["pre"])
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
|
||||
"spec", "pre", "NPV", "F1score"]:
|
||||
df_metrics.loc[index + 1][col] = 0 if pd.isna(df_metrics.loc[index + 1][col]) else \
|
||||
df_metrics.loc[index + 1][col]
|
||||
df_metrics.loc[index + 1][col] = round(df_metrics.loc[index + 1][col], 3)
|
||||
|
||||
# if dataset_type == "independence_test" or dataset_type == "train_all_test":
|
||||
# return None
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_metrics.loc[0][col] = df_metrics.loc[0][col] if df_metrics.loc[0][col] != 0 else np.NAN
|
||||
|
||||
df_metrics.loc[0]["acc"] = df_metrics.iloc[0]["T"] / df_metrics.iloc[0]["support"]
|
||||
df_metrics.loc[0]["recall"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["origin_P"]
|
||||
df_metrics.loc[0]["spec"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["origin_N"]
|
||||
df_metrics.loc[0]["pre"] = df_metrics.iloc[0]["TP"] / df_metrics.iloc[0]["pred_P"]
|
||||
df_metrics.loc[0]["NPV"] = df_metrics.iloc[0]["TN"] / df_metrics.iloc[0]["pred_N"]
|
||||
df_metrics.loc[0]["F1score"] = 2 * df_metrics.iloc[0]["recall"] * df_metrics.iloc[0]["pre"] / (
|
||||
df_metrics.iloc[0]["recall"] + df_metrics.iloc[0]["pre"])
|
||||
for col in ["TP", "TN", "FP", "FN", "acc", "recall", "spec", "pre", "NPV", "F1score"]:
|
||||
df_metrics.loc[0][col] = 0 if pd.isna(df_metrics.loc[0][col]) else df_metrics.loc[0][col]
|
||||
df_metrics.loc[0][col] = round(df_metrics.loc[0][col], 3)
|
||||
|
||||
# 在inner_test中根据 分严重程度绘制
|
||||
if dataset_type == "test":
|
||||
all_severity = ["正常", "轻度", "中度", "重度"]
|
||||
for index, severity in enumerate(all_severity):
|
||||
df_event = df_metrics[df_metrics["severity"] == severity]
|
||||
df_temp = pd.DataFrame(columns=columns2)
|
||||
df_temp.loc[0] = 0
|
||||
df_temp.loc[0]["sampNo"] = severity
|
||||
df_temp.loc[0]["severity"] = str(index + 1)
|
||||
|
||||
df_temp.loc[0]["origin_P"] += df_event["origin_P"].sum()
|
||||
df_temp.loc[0]["origin_N"] += df_event["origin_N"].sum()
|
||||
df_temp.loc[0]["pred_P"] += df_event["pred_P"].sum()
|
||||
df_temp.loc[0]["pred_N"] += df_event["pred_N"].sum()
|
||||
df_temp.loc[0]["T"] += df_event["T"].sum()
|
||||
df_temp.loc[0]["F"] += df_event["F"].sum()
|
||||
df_temp.loc[0]["TP"] += df_event["TP"].sum()
|
||||
df_temp.loc[0]["FP"] += df_event["FP"].sum()
|
||||
df_temp.loc[0]["TN"] += df_event["TN"].sum()
|
||||
df_temp.loc[0]["FN"] += df_event["FN"].sum()
|
||||
df_temp.loc[0]["support"] += df_event["support"].sum()
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN"]:
|
||||
df_temp.loc[0][col] = df_temp.loc[0][col] if df_temp.loc[0][col] != 0 else np.NAN
|
||||
|
||||
df_temp.loc[0]["acc"] = df_temp.iloc[0]["T"] / df_temp.iloc[0]["support"]
|
||||
df_temp.loc[0]["recall"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["origin_P"]
|
||||
df_temp.loc[0]["spec"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["origin_N"]
|
||||
df_temp.loc[0]["pre"] = df_temp.iloc[0]["TP"] / df_temp.iloc[0]["pred_P"]
|
||||
df_temp.loc[0]["NPV"] = df_temp.iloc[0]["TN"] / df_temp.iloc[0]["pred_N"]
|
||||
df_temp.loc[0]["F1score"] = 2 * df_temp.iloc[0]["recall"] * df_temp.iloc[0]["pre"] / (
|
||||
df_temp.iloc[0]["recall"] + df_temp.iloc[0]["pre"])
|
||||
|
||||
for col in ["origin_P", "origin_N", "pred_P", "pred_N", "T", "F", "TP", "TN", "FP", "FN", "acc", "recall",
|
||||
"spec", "pre", "NPV", "F1score"]:
|
||||
df_temp.loc[0][col] = 0 if pd.isna(df_temp.loc[0][col]) else df_temp.loc[0][col]
|
||||
df_temp.loc[0][col] = round(df_temp.loc[0][col], 3)
|
||||
|
||||
df_metrics = df_metrics.append(df_temp, ignore_index=True)
|
||||
|
||||
df_backup = df_metrics
|
||||
df_metrics = df_metrics.astype("str")
|
||||
df_metrics = df_metrics.sort_values("severity")
|
||||
df_metrics.to_csv(base_path / dataset_type /
|
||||
f"{model_name}_{dataset_type}_{'segment' if not is_event else 'event'}_all_metrics.csv",
|
||||
index=False, encoding="gbk")
|
||||
|
||||
return df_backup
|
||||
|
||||
|
||||
def confusionMatrix(df_analysis, base_path, dataset_type):
|
||||
if df_analysis is None:
|
||||
print(base_path, dataset_type, "is None")
|
||||
return
|
||||
|
||||
if df_analysis.empty:
|
||||
print(base_path, dataset_type, "is_empty")
|
||||
return
|
||||
classes = ["normal", "SA"]
|
||||
(base_path / dataset_type / "confusionMatrix").mkdir(exist_ok=True, parents=True)
|
||||
for one_samp in df_analysis.index:
|
||||
one_samp = df_analysis.loc[one_samp]
|
||||
cm = np.array([[one_samp["TN"], one_samp["FP"]], [one_samp["FN"], one_samp["TP"]]])
|
||||
draw_confusionMatrix(cm, classes=classes, title=str(one_samp["severity"]) + " " + one_samp["sampNo"],
|
||||
save_path=base_path / dataset_type / "confusionMatrix" / f"{one_samp['sampNo']}.jpg")
|
||||
|
||||
|
||||
def segment_to_event(df_segment, dataset_type):
|
||||
df_all_event = pd.DataFrame(columns=columns)
|
||||
all_sampNo = df_segment["sampNo"].unique()
|
||||
|
||||
if dataset_type == "test":
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df_event = pd.DataFrame(columns=columns)
|
||||
df = df_segment[df_segment["sampNo"] == sampNo].copy()
|
||||
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
|
||||
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
|
||||
all_segments_no = df["segmentNo"].unique()
|
||||
|
||||
for index_se, segment_No in enumerate(all_segments_no):
|
||||
df_temp = df[df["segmentNo"] == segment_No].copy()
|
||||
SP = df_temp.iloc[0]["EP"]
|
||||
EP = df_temp.iloc[-1]["EP"] + 1
|
||||
df_event.loc[index_se] = [int(sampNo), segment_No, df_temp.iloc[0]["label_type"],
|
||||
df_temp.iloc[0]["new_label"], SP, EP, 0]
|
||||
|
||||
thresh_Pred = df_temp["thresh_Pred"].values
|
||||
thresh_Pred2 = thresh_Pred.copy()
|
||||
|
||||
# 扩充
|
||||
for index_pred, pred in enumerate(thresh_Pred):
|
||||
if pred == 0:
|
||||
continue
|
||||
|
||||
for interval in range(1, thresh_event_interval):
|
||||
if pred == 1 and index_pred + interval < thresh_Pred.size:
|
||||
thresh_Pred2[index_pred + interval] = 1
|
||||
else:
|
||||
continue
|
||||
|
||||
# 判断
|
||||
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
|
||||
index_ar = np.where(same_ar)[0]
|
||||
count_ar = np.diff(index_ar)
|
||||
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
|
||||
for i in value_ar:
|
||||
if i > thresh_event_length:
|
||||
df_event.iloc[index_se]["pred"] = 1
|
||||
|
||||
# df_event.to_csv(events_results / dataset_type / f"{int(sampNo)}_event_results.csv", index=False,
|
||||
# encoding="gbk")
|
||||
df_all_event = df_all_event.append(df_event, ignore_index=True)
|
||||
else:
|
||||
for index, sampNo in enumerate(all_sampNo):
|
||||
df_event = pd.DataFrame(columns=columns)
|
||||
df = df_segment[df_segment["sampNo"] == sampNo].copy()
|
||||
df["thresh_label"] = 1 * (df["label_type"] > event_thresh)
|
||||
df["thresh_Pred"] = 1 * (df["pred"] > thresh)
|
||||
thresh_Pred = df["thresh_Pred"].values
|
||||
thresh_Pred2 = thresh_Pred.copy()
|
||||
# 扩充
|
||||
for index_pred, pred in enumerate(thresh_Pred):
|
||||
if pred == 0:
|
||||
continue
|
||||
|
||||
for interval in range(1, thresh_event_interval):
|
||||
if pred == 1 and index_pred + interval < thresh_Pred.size:
|
||||
thresh_Pred2[index_pred + interval] = 1
|
||||
else:
|
||||
continue
|
||||
|
||||
# 判断
|
||||
same_ar = np.concatenate(([True], thresh_Pred2[:-1] != thresh_Pred2[1:], [True]))
|
||||
index_ar = np.where(same_ar)[0]
|
||||
count_ar = np.diff(index_ar)
|
||||
value_ar = thresh_Pred2[same_ar[:-1]] * count_ar
|
||||
|
||||
for value_index, value in enumerate(value_ar):
|
||||
SP = index_ar[value_index]
|
||||
EP = index_ar[value_index] + count_ar[value_index]
|
||||
# TP, FP
|
||||
if value > thresh_event_length:
|
||||
|
||||
# label_type = 1 if thresh_Pred2[SP:EP].sum() > 0 else 0
|
||||
label_type = df["label_type"][SP:EP].max()
|
||||
new_label = df["new_label"][SP:EP].max()
|
||||
df_event = df_event.append(pd.DataFrame([[int(sampNo), SP // 30, label_type, new_label,
|
||||
SP, EP, thresh_Pred2[SP]]], columns=columns),
|
||||
ignore_index=True)
|
||||
if value > 30:
|
||||
print([int(sampNo), SP // 30, label_type, new_label, SP, EP, thresh_Pred2[SP]])
|
||||
# 长度不够
|
||||
else:
|
||||
df["thresh_Pred"][SP:EP] = 0
|
||||
|
||||
# 对负样本进行统计
|
||||
for segment_no in df["segmentNo"].unique():
|
||||
df_temp = df[df["segmentNo"] == segment_no]
|
||||
if df_temp["thresh_Pred"].sum() > 0:
|
||||
continue
|
||||
|
||||
df_event = df_event.append(pd.DataFrame(
|
||||
[[int(sampNo), segment_no, df_temp["label_type"].max(), df_temp["new_label"].max(), segment_no * 30,
|
||||
(segment_no + 1) * 30, 0]], columns=columns),
|
||||
ignore_index=True)
|
||||
|
||||
df_all_event = df_all_event.append(df_event, ignore_index=True)
|
||||
|
||||
df_temp = df_all_event.loc[:, ["label_type", "pred"]]
|
||||
df_all_event["thresh_label"] = 1 * (df_temp["label_type"] > event_thresh)
|
||||
df_all_event["thresh_Pred"] = 1 * (df_temp["pred"] > thresh)
|
||||
return df_all_event
|
||||
|
||||
|
||||
# 分sampNo保存结果,并不重合地可视化
|
||||
# inner_test
|
||||
|
||||
# 分sampNo将与标签不一致的另行保存,并不重合地可视化
|
||||
|
||||
# import shap
|
||||
# explainer = shap.TreeExplainer()
|
||||
# shap_values = explainer.shap_values()
|
||||
|
||||
if __name__ == '__main__':
|
||||
all_output_path = list(exam_path.rglob("KFold_*"))
|
||||
for exam_index, test_exam_path in enumerate(all_output_path):
|
||||
# test_exam_path = exam_path / test_exam_path
|
||||
set_environment(exam_index)
|
||||
test_and_analysis_and_visual(dataset_type="test")
|
||||
test_and_analysis_and_visual(dataset_type="all_test")
|
46
exam/005/utils/Draw_ConfusionMatrix.py
Normal file
46
exam/005/utils/Draw_ConfusionMatrix.py
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:Draw_ConfusionMatrix.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/08/10
|
||||
"""
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
||||
|
||||
|
||||
def draw_confusionMatrix(cm, classes, title, save_path, cmap=plt.cm.Blues):
|
||||
fig_cm, ax = plt.subplots(figsize=(8, 8), dpi=120)
|
||||
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
|
||||
ax.figure.colorbar(im, ax=ax)
|
||||
ax.set(xticks=np.arange(cm.shape[1]),
|
||||
yticks=np.arange(cm.shape[0]),
|
||||
xticklabels=classes, yticklabels=classes,
|
||||
title=title,
|
||||
ylabel='True label',
|
||||
xlabel='Predicted label')
|
||||
ax.set_ylim(len(classes) - 0.5, -0.5)
|
||||
|
||||
# Rotate the tick labels and set their alignment.
|
||||
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
|
||||
normalize = False
|
||||
fmt = '.2f' if normalize else 'd'
|
||||
thresh = cm.max() * 0.8
|
||||
for i in range(cm.shape[0]):
|
||||
for j in range(cm.shape[1]):
|
||||
ax.text(j, i, format(cm[i, j], fmt),
|
||||
ha="center", va="center",
|
||||
color="white" if cm[i, j] > thresh else "black")
|
||||
fig_cm.tight_layout()
|
||||
fig_cm.savefig(save_path)
|
||||
plt.close()
|
||||
#
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
181
exam/005/utils/Preprocessing.py
Normal file
181
exam/005/utils/Preprocessing.py
Normal file
@ -0,0 +1,181 @@
|
||||
# encoding:utf-8
|
||||
|
||||
"""
|
||||
@ date: 2020-09-16
|
||||
@ author: jingxian
|
||||
@ illustration: Pre-processing
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import pywt
|
||||
from scipy import signal
|
||||
from scipy import fftpack
|
||||
|
||||
|
||||
def Dilate(x, N, g, M):
|
||||
returndata = np.array([])
|
||||
for num in range(N - M + 1):
|
||||
returndata = np.append(returndata, np.min(np.array(x[num:num + M]) - np.array(g)))
|
||||
return returndata
|
||||
|
||||
|
||||
def Eorde(x, N, g, M):
|
||||
returndata = np.array([])
|
||||
for num in range(N - M + 1):
|
||||
returndata = np.append(returndata, np.max(np.array(x[num:num + M]) - np.array(g)))
|
||||
return returndata
|
||||
|
||||
|
||||
def fin_turn(data, peak):
|
||||
if len(data) == 0 or len(peak) == 0: return peak
|
||||
return_peak = []
|
||||
for p in peak:
|
||||
minx, maxx = max(0, p - 100), min(len(data), p + 100)
|
||||
return_peak.append(minx + np.argmax(data[minx: maxx]))
|
||||
return return_peak
|
||||
|
||||
|
||||
class BCG_Operation():
|
||||
def __init__(self, sample_rate=1000):
|
||||
self.sample_rate = sample_rate
|
||||
|
||||
def down_sample(self, data=None, down_radio=10):
|
||||
if data is None:
|
||||
raise ValueError("data is None, please given an real value!")
|
||||
data = data[:len(data) // down_radio * down_radio].reshape(-1, down_radio)[:, 0]
|
||||
self.sample_rate = self.sample_rate / down_radio
|
||||
return data
|
||||
|
||||
def Splitwin(self, data=None, len_win=None, coverage=1.0, calculate_to_end=False):
|
||||
"""
|
||||
分窗
|
||||
:param len_win: length of window
|
||||
:return: signal windows
|
||||
"""
|
||||
if (len_win is None) or (data is None):
|
||||
raise ValueError("length of window or data is None, please given an real value!")
|
||||
else:
|
||||
length = len_win * self.sample_rate # number point of a window
|
||||
# step of split windows
|
||||
step = length * coverage
|
||||
start = 0
|
||||
Splitdata = []
|
||||
while (len(data) - start >= length):
|
||||
Splitdata.append(data[int(start):int(start + length)])
|
||||
start += step
|
||||
if calculate_to_end and (len(data) - start > 2000):
|
||||
remain = len(data) - start
|
||||
start = start - step
|
||||
step = int(remain / 2000)
|
||||
start = start + step * 2000
|
||||
Splitdata.append(data[int(start):int(start + length)])
|
||||
return np.array(Splitdata), step
|
||||
elif calculate_to_end:
|
||||
return np.array(Splitdata), 0
|
||||
else:
|
||||
return np.array(Splitdata)
|
||||
|
||||
def Butterworth(self, data, type, low_cut=0.0, high_cut=0.0, order=10):
|
||||
"""
|
||||
:param type: Type of Butter. filter, lowpass, bandpass, ...
|
||||
:param lowcut: Low cutoff frequency
|
||||
:param highcut: High cutoff frequency
|
||||
:param order: Order of filter
|
||||
:return: Signal after filtering
|
||||
"""
|
||||
if type == "lowpass": # 低通滤波处理
|
||||
b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == "bandpass": # 带通滤波处理
|
||||
low = low_cut / (self.sample_rate * 0.5)
|
||||
high = high_cut / (self.sample_rate * 0.5)
|
||||
b, a = signal.butter(order, [low, high], btype='bandpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == "highpass": # 高通滤波处理
|
||||
b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
else: # 警告,滤波器类型必须有
|
||||
raise ValueError("Please choose a type of fliter")
|
||||
|
||||
def MorphologicalFilter(self, data=None, M=200, get_bre=False):
|
||||
"""
|
||||
:param data: Input signal
|
||||
:param M: Length of structural element
|
||||
:return: Signal after filter
|
||||
"""
|
||||
if not data.any():
|
||||
raise ValueError("The input data is None, please given real value data")
|
||||
g = np.ones(M)
|
||||
Data_pre = np.insert(data, 0, np.zeros(M))
|
||||
Data_pre = np.insert(Data_pre, -1, np.zeros(M))
|
||||
# Opening: 腐蚀 + 膨胀
|
||||
out1 = Eorde(Data_pre, len(Data_pre), g, M)
|
||||
out2 = Dilate(out1, len(out1), g, M)
|
||||
out2 = np.insert(out2, 0, np.zeros(M - 2))
|
||||
# Closing: 膨胀 + 腐蚀
|
||||
out5 = Dilate(Data_pre, len(Data_pre), g, M)
|
||||
out6 = Eorde(out5, len(out5), g, M)
|
||||
out6 = np.insert(out6, 0, np.zeros(M - 2))
|
||||
|
||||
baseline = (out2 + out6) / 2
|
||||
# -------------------------保留剩余价值------------------------
|
||||
data_filtered = Data_pre[:len(baseline)] - baseline
|
||||
data_filtered = data_filtered[M: M + len(data)]
|
||||
baseline = baseline[M:]
|
||||
data_filtered[-1] = data_filtered[-2] = data_filtered[-3]
|
||||
baseline[-1] = baseline[-2] = baseline[-3]
|
||||
if get_bre:
|
||||
return data_filtered, baseline
|
||||
else:
|
||||
return data_filtered
|
||||
|
||||
def Iirnotch(self, data=None, cut_fre=50, quality=3):
|
||||
"""陷波器"""
|
||||
b, a = signal.iirnotch(cut_fre / (self.sample_rate * 0.5), quality)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
|
||||
def ChebyFilter(self, data, rp=1, type=None, low_cut=0, high_cut=0, order=10):
|
||||
"""
|
||||
切比雪夫滤波器
|
||||
:param data: Input signal
|
||||
:param rp: The maximum ripple allowed
|
||||
:param type: 'lowpass', 'bandpass, 'highpass'
|
||||
:param low_cut: Low cut-off fre
|
||||
:param high_cut: High cut-off fre
|
||||
:param order: The order of filter
|
||||
:return: Signal after filter
|
||||
"""
|
||||
if type == 'lowpass':
|
||||
b, a = signal.cheby1(order, rp, low_cut, btype='lowpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == 'bandpass':
|
||||
b, a = signal.cheby1(order, rp, [low_cut, high_cut], btype='bandpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
elif type == 'highpass':
|
||||
b, a = signal.cheby1(order, rp, high_cut, btype='highpass', fs=self.sample_rate)
|
||||
return signal.filtfilt(b, a, np.array(data))
|
||||
else:
|
||||
raise ValueError("The type of filter is None, please given the real value!")
|
||||
|
||||
def Envelope(self, data):
|
||||
"""取信号包络"""
|
||||
if len(data) <= 1: raise ValueError("Wrong input data")
|
||||
hx = fftpack.hilbert(data)
|
||||
return np.sqrt(hx ** 2, data ** 2)
|
||||
|
||||
def wavelet_trans(self, data,c_level=['aaa','aad'], wavelet='db4', mode='symmetric',maxlevel=10):
|
||||
wp = pywt.WaveletPacket(data=data, wavelet=wavelet, mode=mode, maxlevel=maxlevel)
|
||||
new_wp = pywt.WaveletPacket(data=None, wavelet=wavelet, mode=mode)
|
||||
for c in c_level :
|
||||
new_wp[c] = wp[c]
|
||||
return new_wp.reconstruct()
|
||||
|
||||
# def em_decomposition(self, data):
|
||||
# from pyhht.emd import EMD
|
||||
# return EMD(data).decompose()
|
||||
|
84
exam/005/utils/calc_metrics.py
Normal file
84
exam/005/utils/calc_metrics.py
Normal file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@author:Marques
|
||||
@file:calc_metrics.py
|
||||
@email:admin@marques22.com
|
||||
@email:2021022362@m.scnu.edu.cn
|
||||
@time:2022/02/12
|
||||
"""
|
||||
import torch
|
||||
import torchmetrics
|
||||
|
||||
|
||||
class CALC_METRICS:
|
||||
metrics = []
|
||||
nc = 0
|
||||
|
||||
def __init__(self, nc):
|
||||
self.nc = nc
|
||||
self.metrics.append(torchmetrics.Accuracy(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Recall(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Precision(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.Specificity(average="none", num_classes=nc, multiclass=False))
|
||||
self.metrics.append(torchmetrics.F1Score(average="none", num_classes=nc, multiclass=False))
|
||||
self.valid_result = self.train_result = None
|
||||
|
||||
def update(self, pred, target):
|
||||
for part1 in self.metrics:
|
||||
part1.update(pred.cpu(), target.cpu())
|
||||
|
||||
def compute(self):
|
||||
result = []
|
||||
for part1 in self.metrics:
|
||||
result.append(part1.compute())
|
||||
|
||||
def reset(self):
|
||||
for part1 in self.metrics:
|
||||
part1.reset()
|
||||
|
||||
def get_matrix(self, loss=None, cur_lr=None, epoch=None, epoch_type=None):
|
||||
temp_result = []
|
||||
for j in self.metrics:
|
||||
compute_result = (j.compute().cpu().numpy() * 100).tolist()
|
||||
temp_result.append(compute_result)
|
||||
|
||||
if epoch_type == "train":
|
||||
self.train_result = [loss] + temp_result
|
||||
elif epoch_type == "valid":
|
||||
self.valid_result = [loss] + temp_result
|
||||
else:
|
||||
pass
|
||||
|
||||
a = ""
|
||||
a += f"{epoch_type} epoch: {str(epoch)} loss: {str(loss)} lr: {str(cur_lr)} \n"
|
||||
a += " " * 8 + "Acc".center(8) + "Rec".center(8) + "Pre".center(8) + "Spe".center(8) + "F1".center(8) + "\n"
|
||||
a += "all".center(8) + "".join([str(round(float(i), 2)).center(8) for i in temp_result]) + "\n"
|
||||
return a
|
||||
|
||||
def wandb_log(self, wandb=None, cur_lr=None):
|
||||
if wandb is None:
|
||||
return
|
||||
|
||||
keyword = ["Accuracy", "Recall", "Precision", "Specificity", "F1Score"]
|
||||
dict_key = []
|
||||
for epoch_type in ["train", "valid"]:
|
||||
dict_key.append(epoch_type + "/" + "loss")
|
||||
for i in keyword:
|
||||
dict_key.append(epoch_type + "/" + i)
|
||||
|
||||
log_dict = dict(zip(dict_key, self.train_result + self.valid_result))
|
||||
log_dict["lr"] = cur_lr
|
||||
wandb.log(log_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pred = [[0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9], [1.0]]
|
||||
# true = [[0], [0], [1], [0], [0], [0], [0], [0], [0], [1]]
|
||||
pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
|
||||
true = [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
|
||||
pred = torch.tensor(pred).cuda()
|
||||
true = torch.tensor(true).cuda()
|
||||
calc_metrics = CALC_METRICS(1)
|
||||
calc_metrics.update(pred, true)
|
||||
print(calc_metrics.get_matrix())
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -24,8 +24,8 @@ select_sampno:
|
||||
- 1009
|
||||
|
||||
# train hyperparameters config
|
||||
epoch: 20
|
||||
lr: 0.0001
|
||||
epoch: 50
|
||||
lr: 0.00001
|
||||
nc: 1
|
||||
|
||||
# wandb config
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -24,8 +24,8 @@ select_sampno:
|
||||
- 1009
|
||||
|
||||
# train hyperparameters config
|
||||
epoch: 20
|
||||
lr: 0.0001
|
||||
epoch: 50
|
||||
lr: 0.00001
|
||||
nc: 1
|
||||
|
||||
# wandb config
|
||||
|
@ -121,6 +121,8 @@ def model_train(model, train_loader, optimizer, scheduler, loss_func, training_s
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
calc_metrics.compute()
|
||||
logger.info("")
|
||||
logger.info("--------------------------------------")
|
||||
logger.info(training_state)
|
||||
logger.info(calc_metrics.get_matrix(loss=train_loss, epoch=epoch, epoch_type="train", cur_lr=cur_lr))
|
||||
calc_metrics.reset()
|
||||
@ -162,6 +164,34 @@ def model_valid(model, valid_loader, wdir, loss_func):
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
def model_test(model, test_loader, loss_func):
|
||||
model.eval()
|
||||
test_loss = 0.0
|
||||
for resp, stft, labels in test_loader:
|
||||
resp = resp.float().cuda() if gpu else resp.float()
|
||||
stft = stft.float().cuda() if gpu else stft.float()
|
||||
labels = labels.cuda() if gpu else labels
|
||||
with torch.no_grad():
|
||||
# segments = F.normalize(segments)
|
||||
# segments = segments - torch.mean(segments, dim=1).view(-1, 1)
|
||||
# segments = F.normalize(segments - torch.mean(segments, dim=1).view(-1, 1))
|
||||
# segments = segments.view(len(segments), 1, -1)
|
||||
|
||||
out = model(resp, stft)
|
||||
out = F.softmax(out, dim=1)
|
||||
loss = loss_func(out, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
labels = torch.unsqueeze(labels, dim=1)
|
||||
out = torch.unsqueeze(out[:, 1], dim=1)
|
||||
calc_metrics.update(out.cpu(), labels.cpu())
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
calc_metrics.compute()
|
||||
logger.info(calc_metrics.get_matrix(loss=test_loss, epoch=epoch, epoch_type="test"))
|
||||
calc_metrics.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
@ -191,7 +221,7 @@ if __name__ == '__main__':
|
||||
model_net.cuda()
|
||||
|
||||
k_folds = 5
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True)
|
||||
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
|
||||
|
||||
logger.info('--------------------------------')
|
||||
for fold, (train_ids, test_ids) in enumerate(kfold.split(select_sampno)):
|
||||
@ -213,9 +243,11 @@ if __name__ == '__main__':
|
||||
|
||||
train_dataset = ApneaDataset(data_path, label_path, train_set, "train", my_segment_augment)
|
||||
valid_dataset = ApneaDataset(data_path, label_path, train_set, "valid", my_segment_augment)
|
||||
test_dataset = ApneaDataset(data_path, label_path, train_set, "test", my_segment_augment)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=bs, pin_memory=True, num_workers=worker, shuffle=True)
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
test_loader = DataLoader(test_dataset, batch_size=bs, pin_memory=True, num_workers=worker)
|
||||
|
||||
# 重新初始化模型
|
||||
del model_net
|
||||
@ -251,5 +283,6 @@ if __name__ == '__main__':
|
||||
model_train(model_net, train_loader, optimizer, scheduler, loss_function,
|
||||
f"EXAM:{exam_name} FOLD:{fold}/{k_folds} EPOCH:{epoch}/{epochs}")
|
||||
model_valid(model_net, valid_loader, wdir, loss_function)
|
||||
model_test(model_net, test_loader, loss_function)
|
||||
if wandb is not None:
|
||||
calc_metrics.wandb_log(wandb=wandb, cur_lr=optimizer.param_groups[-1]['lr'])
|
||||
|
@ -1,5 +1,5 @@
|
||||
# environment config
|
||||
GPU: "0"
|
||||
GPU: "1"
|
||||
|
||||
|
||||
# dataset config
|
||||
@ -24,8 +24,8 @@ select_sampno:
|
||||
- 1009
|
||||
|
||||
# train hyperparameters config
|
||||
epoch: 20
|
||||
lr: 0.0001
|
||||
epoch: 50
|
||||
lr: 0.00001
|
||||
nc: 1
|
||||
|
||||
# wandb config
|
||||
|
@ -4,13 +4,12 @@
|
||||
# 初探学习率
|
||||
-------------------------------------------------
|
||||
000
|
||||
学习率 1e-4 epoch 20 可以完成任务
|
||||
学习率 1e-4 epoch 20 过早(epoch 6)过拟合
|
||||
|
||||
001
|
||||
学习率 1e-5 epoch 50 和000结果相近,所以选择000参数
|
||||
|
||||
学习率 1e-5 epoch 50 可以
|
||||
002
|
||||
学习率 1e-6 epoch 50 收敛过慢
|
||||
学习率 1e-6 epoch 50 比较合适,就是太慢了
|
||||
|
||||
# 主要影响F1的是precision
|
||||
-------------------------------------------------
|
||||
@ -20,7 +19,23 @@
|
||||
数据集减去平均值外,没有其他的预处理方法
|
||||
--------------------------------------------------------
|
||||
003
|
||||
学习率 1e-5 从目前结果看用整晚数据集也行,不过还没有加入潮式呼吸数据
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,没有其他的预处理方法 保持正样本权重为1改成保持负样本权重为1
|
||||
--------------------------------------------------------
|
||||
004
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,没有其他的预处理方法 改成单输出
|
||||
--------------------------------------------------------
|
||||
005
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
@ -31,7 +46,7 @@
|
||||
数据集减去平均值外,削顶600,归一化
|
||||
--------------------------------------------------------
|
||||
011
|
||||
学习率 1e-5
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------
|
||||
@ -39,7 +54,7 @@
|
||||
数据集减去平均值外,使用整体混叠信号Z-score整理数据集
|
||||
--------------------------------------------------------
|
||||
012
|
||||
学习率 1e-4 EPOCH 20
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
@ -48,34 +63,89 @@
|
||||
数据集减去平均值外,使用每个片段的混叠信号Z-score整理数据集
|
||||
--------------------------------------------------------
|
||||
013
|
||||
学习率 1e-4 EPOCH 20
|
||||
学习率 1e-5 epoch 50
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,使用大体动检测后缩放
|
||||
--------------------------------------------------------
|
||||
014
|
||||
学习率 1e-4 EPOCH 20
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,仅用质量好的数据训练,class_weight 解决不平衡问题,
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
# 修改卷积层的激活函数
|
||||
--------------------------------------------------------
|
||||
021
|
||||
学习率 1e-4 这个不实现先
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
# 引入残差块
|
||||
--------------------------------------------------------
|
||||
022
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM
|
||||
--------------------------------------------------------
|
||||
023
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM 对两者输出的结果再用CNN
|
||||
--------------------------------------------------------
|
||||
024
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
# 修改卷积层的激活函数 RESP用CNN(RESNET?) STFT用LSTM 对两者输出的结果再用CNN
|
||||
--------------------------------------------------------
|
||||
025
|
||||
学习率 1e-5 EPOCH 20 TODO
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
##
|
||||
TODO: 质量好的信号结果分析;严重程度的分析
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
**全量**非整晚数据集训练 1:3.5,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
--------------------------------------------------------
|
||||
022
|
||||
031
|
||||
学习率 1e-4
|
||||
--------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------
|
||||
非整晚数据集训练 1:3.5,仅用质量好的数据训练,class_weight 解决不平衡问题,
|
||||
数据集减去平均值外,选上述最好的处理方法
|
||||
--------------------------------------------------------
|
||||
0xx
|
||||
学习率 1e-4 这个不实现先
|
||||
--------------------------------------------------------
|
Loading…
Reference in New Issue
Block a user