562 lines
23 KiB
Python
562 lines
23 KiB
Python
#!/usr/bin/python
|
||
# -*- coding: UTF-8 -*-
|
||
"""
|
||
@author:Marques
|
||
@file:generate_label_11.0.py
|
||
@email:admin@marques22.com
|
||
@email:2021022362@m.scnu.edu.cn
|
||
@time:2022/09/05
|
||
"""
|
||
# 14.0
|
||
# 手动均衡数量
|
||
|
||
# 13.0
|
||
# 限制选择部分数据集,先做测试
|
||
|
||
# 12.0
|
||
# 置不可用事件的片段为上限,不可用片段设置为背景,不记录事件
|
||
|
||
# 10.0
|
||
# 使用提出质量差的信号
|
||
|
||
# 9.0
|
||
# 增加 最新的质量标签 未使用
|
||
|
||
# 8.0
|
||
# 生成 除低通气所有事件标签
|
||
|
||
# 尝试过步进两秒 会造成不足两秒的数据被抛弃,造成较多误判,但是可以考虑囊括这部分
|
||
# 采用 30秒数据 移动 1秒 将所有呼吸暂停标注为1 低通气为0 正常为0
|
||
|
||
# 预处理操作 为 50Hz陷波滤波器去工频 外加 20Hz的低通滤波器 这个20Hz要看BCG信号的频谱范围
|
||
|
||
# 先提剔除极端值
|
||
# 数值大于最高基准线或最低基准线
|
||
# type1: average:1800 low:1200 high:2400
|
||
# type2: average:2400 low:1800 high:3000
|
||
|
||
# 过多片段会造成平均值偏移
|
||
# TODO
|
||
# 加入体动标签,计算除体动外的平均值
|
||
|
||
# 最后降采为100hz
|
||
|
||
import time
|
||
import logging
|
||
import numpy as np
|
||
import pandas as pd
|
||
from pathlib import Path
|
||
|
||
from datetime import datetime
|
||
|
||
import yaml
|
||
from pathos import multiprocessing
|
||
from tqdm import tqdm
|
||
|
||
# 数据集 和 标签 位置
|
||
bcg_numpy_data_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_100hz_lowpass50/")
|
||
bcg_label_path = Path(r"/home/marques/code/marques/apnea/dataset/BCG_label_0616/")
|
||
|
||
# BCG 记录开始时间
|
||
bcg_start_time = np.loadtxt(Path(r"/home/marques/code/marques/apnea/dataset/start_time.csv"), delimiter=', ',
|
||
dtype=object)
|
||
bcg_start_time = dict(zip(bcg_start_time[:, 0], bcg_start_time[:, 1]))
|
||
|
||
# 读取每个数据集路径
|
||
all_numpy_dataset = list(bcg_numpy_data_path.rglob("*.npy"))
|
||
all_numpy_dataset.sort()
|
||
|
||
# 划分后的数据集保存路径
|
||
# dataset_save_path = Path(r"/home/marques/code/marques/apnea/dataset/dataset/dataset0623_300_30_30/")
|
||
dataset_save_path = Path(r"./dataset/")
|
||
dataset_save_path.mkdir(exist_ok=True)
|
||
|
||
# 设置日志
|
||
logger = logging.getLogger()
|
||
logger.setLevel(logging.NOTSET)
|
||
realtime = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||
fh = logging.FileHandler(dataset_save_path / (realtime + ".log"), mode='w')
|
||
fh.setLevel(logging.NOTSET)
|
||
# fh.setFormatter(logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"))
|
||
fh.setFormatter(logging.Formatter("%(message)s"))
|
||
logger.addHandler(fh)
|
||
|
||
ch = logging.StreamHandler()
|
||
ch.setLevel(logging.NOTSET)
|
||
ch.setFormatter(logging.Formatter("%(message)s"))
|
||
logger.addHandler(ch)
|
||
|
||
# all_label = []
|
||
# 输出统计数据标题栏
|
||
# logger.info("sampNo".center(8) +
|
||
# "hpy_num".center(8) + "hpy_time".center(10) +
|
||
# "csa_num".center(8) + "csa_time".center(10) +
|
||
# "osa_num".center(8) + "osa_time".center(10) +
|
||
# "msa_num".center(8) + "msa_time".center(10)
|
||
# )
|
||
|
||
logger.info("sampNo".center(8) + ',' +
|
||
"train_num".center(10) + ',' + "train_P".center(10) + ',' + "train_N".center(10) + ',' +
|
||
"valid_num".center(10) + ',' + "valid_P".center(10) + ',' + "valid_N".center(10) + ',' +
|
||
"test_num".center(10) + ',' + "test_P".center(10) + ',' + "test_N".center(10) + ',' +
|
||
"train_eve".center(10) + ',' + "valid_eve".center(10) + ',' + "test_eve".center(10)
|
||
)
|
||
|
||
base_random_seed = 42
|
||
|
||
window_second = 30
|
||
step_second = 1
|
||
|
||
valid_ratio = 0.1
|
||
test_ratio = 0.1
|
||
|
||
normal_event_quality_label = 0
|
||
# valid_ratio = 5000
|
||
# test_ratio = 10000
|
||
|
||
assert ((valid_ratio + test_ratio) < 1 and 0 < valid_ratio < 1 and 0 < test_ratio < 1) or (
|
||
valid_ratio > 1 and valid_ratio > 1), "验证集与测试集输入应同时为比例或数量"
|
||
|
||
# dataset sampNo for test
|
||
with open("./settings.yaml") as f:
|
||
hyp = yaml.load(f, Loader=yaml.SafeLoader)
|
||
select_dataset = hyp["select_sampno"]
|
||
|
||
# 需要置成0的片段,前面不一定补零,还有可能上万
|
||
disable_segment = {
|
||
'221': [[0, 10000]],
|
||
'670': [[0, 20000]],
|
||
'683': [[0, 20000]],
|
||
'704': [[0, 26000]],
|
||
'726': [[0, 20000]],
|
||
'736': [[0, 47000]],
|
||
'933': [[0, 773560]],
|
||
'935': [[0, 26600]],
|
||
'952': [[0, 17000]],
|
||
'955': [[0, 78000]],
|
||
'961': [[0, 107000]],
|
||
'962': [[0, 15100]],
|
||
'966': [[0, 13120]],
|
||
'967': [[0, 44000]],
|
||
'1006': [[0, 60000]],
|
||
'1009': [[0, 1000]],
|
||
'1010': [[0, 49000]],
|
||
'1296': [[0, 27000]],
|
||
'1300': [[0, 33800]],
|
||
'1301': [[0, 14000]],
|
||
'1302': [[0, 5600]],
|
||
'1374': [[0, 1000]],
|
||
'1478': [[0, 998000]],
|
||
|
||
}
|
||
|
||
|
||
# 生成数据集主函数
|
||
def generate_label(No, dataset_path):
|
||
"""
|
||
:param dataset_path: 数据集路径
|
||
:return:
|
||
"""
|
||
|
||
# 获取数据编号
|
||
sampNo = dataset_path.stem.split("samp")[0]
|
||
# 标签路径
|
||
label_path = bcg_label_path / f"export{sampNo}_all.csv"
|
||
|
||
if not label_path.exists():
|
||
raise FileNotFoundError(f"{label_path} not exist")
|
||
|
||
if not dataset_path.exists():
|
||
raise Exception(f"{dataset_path} not exists")
|
||
|
||
# 加载数据集
|
||
select_numpy = np.load(dataset_path)
|
||
select_numpy_len = len(select_numpy)
|
||
|
||
# 开头不合理片段置零
|
||
if sampNo in disable_segment.keys():
|
||
for sp, ep in disable_segment[sampNo]:
|
||
select_numpy[sp:ep] = 0
|
||
|
||
# 剔除质量差信号
|
||
if sampNo == "670":
|
||
select_numpy = select_numpy[:17195 * 100]
|
||
|
||
# 获取前面补了多少0
|
||
not_zero_point = 0
|
||
for num in select_numpy:
|
||
if num > 10:
|
||
break
|
||
not_zero_point += 1
|
||
not_zero_point //= 100
|
||
|
||
# 读取标签
|
||
label_csv = pd.read_csv(label_path, encoding='gbk')
|
||
label_csv["new_start"] = label_csv["new_start"].astype("int")
|
||
label_csv["new_end"] = label_csv["new_end"].astype("int")
|
||
label_csv["Duration"] = label_csv["Duration"].astype("int")
|
||
label_csv["new_label"] = label_csv["new_label"].fillna("2")
|
||
label_csv["new_label"] = label_csv["new_label"].astype("int")
|
||
# 剔除质量不好的样本
|
||
# drop_csv = label_csv[
|
||
# (label_csv["Event type"].isin(["Central apnea", "Obstructive apnea"])) & (label_csv["new_label"] == 2)]
|
||
# label_csv = label_csv.drop(drop_csv.index)
|
||
|
||
# 事件片段与背景片段, 每个背景长度均为设定窗长
|
||
segment_labels = []
|
||
negative_labels = []
|
||
hpy_num = csa_num = osa_num = msa_num = 0
|
||
hpy_time = csa_time = osa_time = msa_time = 0
|
||
|
||
# 遍历全部事件并统计
|
||
for i in range(len(label_csv)):
|
||
# 进行LabelEncoder
|
||
label = label_csv.iloc[i, :]
|
||
# 如果事件在补零片段,则不添加到事件列表
|
||
if label["new_end"] < not_zero_point:
|
||
continue
|
||
|
||
if sampNo == "670" and label["new_start"] > 17195:
|
||
continue
|
||
|
||
if label["new_end"] - label["new_start"] < 10:
|
||
continue
|
||
|
||
# 将事件添加到事件列表
|
||
if label["Event type"] == "Hypopnea":
|
||
label_type = 1
|
||
hpy_num += 1
|
||
hpy_time += label["new_end"] - label["new_start"]
|
||
|
||
# 将低通气添加到背景 好像不用专门加入到负样本事件中?
|
||
# negative_labels.append(
|
||
# [sampNo, i, label_type, normal_event_quality_label, label["new_start"], label["new_end"]])
|
||
continue
|
||
elif label["Event type"] == "Central apnea":
|
||
label_type = 2
|
||
csa_num += 1
|
||
csa_time += label["new_end"] - label["new_start"]
|
||
elif label["Event type"] == "Obstructive apnea":
|
||
label_type = 3
|
||
osa_num += 1
|
||
osa_time += label["new_end"] - label["new_start"]
|
||
# MSA 认为是OSA
|
||
elif label["Event type"] == "Mixed apnea":
|
||
label_type = 3
|
||
msa_num += 1
|
||
msa_time += label["new_end"] - label["new_start"]
|
||
else:
|
||
continue
|
||
# label_type = 0
|
||
if label["new_end"] - label["new_start"] > label["Duration"] + 20:
|
||
print(sampNo, label)
|
||
|
||
# 格式为 样本编号 第几个事件 标签 开始事件 结束事件
|
||
segment_labels.append([sampNo, i, label_type, label["new_label"], label["new_start"], label["new_end"]])
|
||
|
||
# logger.info(sampNo.center(8) +
|
||
# str(hpy_num).center(8) + str(hpy_time).center(10) +
|
||
# str(csa_num).center(8) + str(csa_time).center(10) +
|
||
# str(osa_num).center(8) + str(osa_time).center(10) +
|
||
# str(msa_num).center(8) + str(msa_time).center(10))
|
||
|
||
# 设置随机树种子
|
||
random_seed = base_random_seed + int(sampNo)
|
||
|
||
# 对于无事件的样本,直接将所有的片段
|
||
if len(segment_labels) == 0:
|
||
# 剔除补零片段(把开始点移动到补零结束)
|
||
normal_SP = not_zero_point
|
||
# 开头至少满足一个窗长
|
||
if normal_SP < window_second:
|
||
normal_SP = window_second
|
||
|
||
# 结束点为样本总长 除以 采样率
|
||
normal_EP = select_numpy_len // 100
|
||
label_type = 0
|
||
# 正常时间编号为秒数 除以 30,即epoch
|
||
negative_labels += [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1,
|
||
SP1 + window_second] for SP1 in
|
||
range(normal_SP - window_second + step_second, normal_EP - window_second + step_second,
|
||
window_second)]
|
||
|
||
# 对于有事件的样本
|
||
# 遍历事件,获取事件之间的背景片段
|
||
for index in range(len(segment_labels) + 1):
|
||
# 前一个事件的结尾 与 下一事件开头即为背景
|
||
# 对于开头的无事件片段,则设定开始事件为0
|
||
if index == 0:
|
||
normal_SP = 0
|
||
# 非开头片段,开始点为上一个事件的结尾
|
||
else:
|
||
# 加一秒 作为 缓冲
|
||
normal_SP = segment_labels[index - 1][-1] + 1
|
||
|
||
# 最后一个事件则取到样本片段结尾
|
||
if index == len(segment_labels):
|
||
normal_EP = select_numpy_len // 100 - window_second
|
||
# 否则结束事件取 本事件开头
|
||
else:
|
||
# 减一秒 作为 缓冲
|
||
normal_EP = segment_labels[index][-2] - 1
|
||
|
||
# 剔除包含开头补零的片段
|
||
if normal_EP < not_zero_point:
|
||
continue
|
||
|
||
# 剔除开头不足30s的正常片段
|
||
if normal_SP < window_second:
|
||
continue
|
||
label_type = 0
|
||
|
||
# 将背景事件按照滑窗距离逐个加入到背景事件中
|
||
temp_1 = [[sampNo, 10000 + normal_SP // 30, label_type, normal_event_quality_label, SP1, SP1 + window_second]
|
||
for SP1 in range(normal_SP - window_second, normal_EP - window_second, window_second)]
|
||
|
||
negative_labels += temp_1
|
||
|
||
train_label, valid_label, test_label = [], [], []
|
||
# assert (valid_ratio + test_ratio) < 1 <= len(segment_labels), f"{sampNo}训练集与测试集数量应小于总数据集数量{len(segment_labels)}"
|
||
|
||
# 对于测试数据全部直接保存
|
||
if int(sampNo) in select_dataset:
|
||
event_label = np.zeros(select_numpy_len // 100)
|
||
quality_label = np.zeros(select_numpy_len // 100)
|
||
# 用于存储事件标签
|
||
for PN, segmentNo, label_type, new_label, SP, EP in segment_labels:
|
||
event_label[SP:EP] = label_type
|
||
|
||
test_label = []
|
||
# 剔除补零片段
|
||
normal_SP = not_zero_point
|
||
if normal_SP < window_second:
|
||
normal_SP = window_second
|
||
normal_EP = select_numpy_len // 100
|
||
|
||
# 分成指定窗长的滑窗片段
|
||
test_label += [
|
||
[sampNo, SP1 // 30, int(event_label[SP1 + window_second - step_second]),
|
||
int(quality_label[SP1 + window_second - step_second]),
|
||
SP1, SP1 + window_second] for SP1 in range(normal_SP - window_second + step_second,
|
||
normal_EP - window_second + step_second, step_second)]
|
||
|
||
logger.info(sampNo.center(8) + ',' +
|
||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||
str(0).center(10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' +
|
||
str(len(test_label)).center(10) + ',' +
|
||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(10) +
|
||
',' + str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||
10) + ',' + str(0).center(10) + ',' + str(0).center(10) + ',' + str(len(segment_labels)).center(10)
|
||
)
|
||
|
||
df2.loc[No] = [sampNo,
|
||
str(0), str(0), str(0),
|
||
str(0), str(0), str(0),
|
||
str(len(test_label)),
|
||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||
str(0), str(0), str(len(segment_labels))]
|
||
|
||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_all_{window_second}s_sa_test2_label.npy",
|
||
# np.array(test_label))
|
||
df1 = pd.DataFrame(data=test_label,
|
||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_all_label.csv",
|
||
index=False)
|
||
|
||
train_label, valid_label, test_label = [], [], []
|
||
# 对于训练与验证集样本
|
||
if True:
|
||
# 打乱片段顺序
|
||
np.random.seed(random_seed)
|
||
np.random.shuffle(segment_labels)
|
||
np.random.shuffle(negative_labels)
|
||
|
||
# 获取训练集、验证集、测试集分到事件个数
|
||
if 0 < valid_ratio < 1:
|
||
train_segment_num = int(len(segment_labels) * (1 - valid_ratio - test_ratio))
|
||
valid_segment_num = int(len(segment_labels) * (1 - test_ratio))
|
||
else:
|
||
train_segment_num = len(segment_labels) - valid_ratio - test_ratio
|
||
valid_segment_num = valid_ratio
|
||
|
||
# 分别将各事件切分为30s, 步进1秒的片段
|
||
for index in range(train_segment_num):
|
||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||
train_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||
|
||
for index in range(train_segment_num, valid_segment_num):
|
||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||
valid_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||
range(SP - window_second, EP - window_second + step_second, step_second)]
|
||
|
||
for index in range(valid_segment_num, len(segment_labels)):
|
||
PN, segmentNo, label_type, new_label, SP, EP = segment_labels[index]
|
||
test_label += [[PN, segmentNo, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||
range(SP - window_second, EP - window_second + step_second * step_second)]
|
||
|
||
# 计算片段和事件个数
|
||
train_num, valid_num, test_num = len(train_label), len(valid_label), len(test_label)
|
||
train_eve, valid_eve, test_eve = train_segment_num, (valid_segment_num - train_segment_num), (
|
||
len(segment_labels) - valid_segment_num)
|
||
|
||
# 数据集补偿
|
||
# if train_num < 300:
|
||
# train_num = 300 - train_num
|
||
#
|
||
# if valid_num < 300:
|
||
# valid_num = 300 - valid_num
|
||
#
|
||
# if test_num < 300:
|
||
# test_num = 300 - test_num
|
||
|
||
# 获取训练集、验证集、测试集分到背景个数
|
||
# if 0 < valid_ratio < 1:
|
||
# train_eve2 = int(len(negative_labels) * (1 - valid_ratio - test_ratio))
|
||
# valid_eve2 = int(len(negative_labels) * valid_ratio)
|
||
# else:
|
||
# train_eve2 = len(negative_labels) - valid_ratio - test_ratio
|
||
# valid_eve2 = valid_ratio
|
||
#
|
||
# test_eve2 = len(negative_labels) - train_eve2 - valid_eve2
|
||
# 直接补充到足够个数的背景事件
|
||
train_eve2 = max(train_eve, 300)
|
||
valid_eve2 = max(valid_eve, 40)
|
||
test_eve2 = max(test_eve, 40)
|
||
|
||
# 强制背景数量
|
||
# train_eve2 = train_eve
|
||
# valid_eve2 = valid_eve
|
||
# test_eve2 = test_eve
|
||
|
||
# 添加背景事件数量
|
||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[:train_eve2]:
|
||
label_type = int(label_type)
|
||
train_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||
in
|
||
range(normal_SP, normal_EP, step_second)]
|
||
|
||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||
train_eve2: train_eve2 + valid_eve2]:
|
||
label_type = int(label_type)
|
||
valid_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1
|
||
in
|
||
range(normal_SP, normal_EP, step_second)]
|
||
|
||
for sampNo, index, label_type, new_label, normal_SP, normal_EP in negative_labels[
|
||
train_eve2 + valid_eve2:train_eve2 + valid_eve2 + test_eve2]:
|
||
label_type = int(label_type)
|
||
test_label += [[sampNo, 10000 + normal_SP // 30, label_type, new_label, SP1, SP1 + window_second] for SP1 in
|
||
range(normal_SP, normal_EP, step_second)]
|
||
|
||
logger.info(sampNo.center(8) + ',' +
|
||
str(len(train_label)).center(10) + ',' +
|
||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(len(valid_label)).center(10) + ',' +
|
||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(len(test_label)).center(10) + ',' +
|
||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0).center(
|
||
10) + ',' +
|
||
str(train_eve).center(10) + ',' + str(valid_eve).center(10) + ',' + str(test_eve).center(10)
|
||
)
|
||
|
||
df2.loc[No] = [sampNo.center(8),
|
||
str(len(train_label)),
|
||
str(sum(np.array(train_label)[:, 2].astype(int) > 1) if len(train_label) != 0 else 0),
|
||
str(sum(np.array(train_label)[:, 2].astype(int) < 1) if len(train_label) != 0 else 0),
|
||
str(len(valid_label)),
|
||
str(sum(np.array(valid_label)[:, 2].astype(int) > 1) if len(valid_label) != 0 else 0),
|
||
str(sum(np.array(valid_label)[:, 2].astype(int) < 1) if len(valid_label) != 0 else 0),
|
||
str(len(test_label)),
|
||
str(sum(np.array(test_label)[:, 2].astype(int) > 1) if len(test_label) != 0 else 0),
|
||
str(sum(np.array(test_label)[:, 2].astype(int) < 1) if len(test_label) != 0 else 0),
|
||
str(train_eve), str(valid_eve), str(test_eve).center(10)]
|
||
|
||
def label_check(label_list):
|
||
temp_list = []
|
||
for sampNo, index, label_type, new_label, SP, EP in label_list:
|
||
if EP - SP < window_second:
|
||
print(sampNo, index, label_type, SP, EP)
|
||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||
|
||
if SP < 0:
|
||
print(sampNo, index, label_type, SP, EP)
|
||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||
|
||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||
print(sampNo, index, label_type, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||
temp_list.append([sampNo, index, label_type, new_label, SP, EP])
|
||
|
||
for j in temp_list:
|
||
label_list.remove(j)
|
||
|
||
label_check(train_label)
|
||
label_check(valid_label)
|
||
label_check(test_label)
|
||
for sampNo, index, label_type, new_label, SP, EP in train_label:
|
||
if EP - SP < window_second:
|
||
print(sampNo, index, label_type, new_label, SP, EP)
|
||
|
||
if SP < 0:
|
||
print(sampNo, index, label_type, new_label, SP, EP)
|
||
|
||
if len(select_numpy[SP * 100:EP * 100]) != window_second * 100:
|
||
print(sampNo, index, label_type, new_label, SP, EP, len(select_numpy[SP * 100:EP * 100]))
|
||
|
||
df1 = pd.DataFrame(data=train_label,
|
||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.csv",
|
||
index=False)
|
||
|
||
df1 = pd.DataFrame(data=valid_label,
|
||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.csv",
|
||
index=False)
|
||
|
||
df1 = pd.DataFrame(data=test_label,
|
||
columns=["sampNo", "index", "label_type", "new_label", "SP", "EP"])
|
||
df1.to_csv(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.csv", index=False)
|
||
|
||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_train_label.npy",
|
||
# np.array(train_label))
|
||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_valid_label.npy",
|
||
# np.array(valid_label))
|
||
# np.save(dataset_save_path / f"{sampNo}_{step_second}s_focal_{window_second}s_sa_test_label.npy",
|
||
# np.array(test_label))
|
||
|
||
|
||
if __name__ == '__main__':
|
||
# pool = multiprocessing.Pool(processes=44)
|
||
# pool.map(generate_label, list(all_numpy_dataset))
|
||
# pool.close()
|
||
# pool.join()
|
||
|
||
df2 = pd.DataFrame(data=None,
|
||
columns=["sampNo",
|
||
"train_num", "train_P", "train_N",
|
||
"valid_num", "valid_P", "valid_N",
|
||
"test_num", "test_P", "test_N",
|
||
"train_eve", "valid_eve", "test_eve"])
|
||
|
||
temp = []
|
||
for one_dataset in all_numpy_dataset:
|
||
if int(one_dataset.stem.split("samp")[0]) in [*select_dataset]:
|
||
temp.append(one_dataset)
|
||
# for one_dataset in temp:
|
||
# all_numpy_dataset.remove(one_dataset)
|
||
|
||
for No, one_dataset in enumerate(temp):
|
||
generate_label(No, one_dataset)
|
||
|
||
df2.to_csv(dataset_save_path / (realtime + ".csv"), index=False)
|
||
# generate_label(all_numpy_dataset[0])
|