272 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			272 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # encoding:utf-8
 | ||
| 
 | ||
| import os
 | ||
| import numpy as np
 | ||
| import pandas as pd
 | ||
| import warnings
 | ||
| import matplotlib.pyplot as plt
 | ||
| import matplotlib.colors as colors
 | ||
| 
 | ||
| from scipy import signal
 | ||
| from glob import glob
 | ||
| from torch.utils.data import Dataset, DataLoader, TensorDataset
 | ||
| warnings.filterwarnings("ignore")
 | ||
| 
 | ||
| class BCGDataset(Dataset):
 | ||
|     def __init__(self, train=True):
 | ||
|         if train:
 | ||
|             self.data = np.array(pd.read_csv("./in_data/train.txt").iloc[:,np.arange(1000)])
 | ||
|             self.label = np.array(pd.read_csv("./in_data/train.txt").iloc[:,np.arange(1000,2000)])
 | ||
|         else:
 | ||
|             self.data = np.array(pd.read_csv("./in_data/test.txt").iloc[:, np.arange(1000)])
 | ||
|             self.label = np.array(pd.read_csv("./in_data/test.txt").iloc[:, np.arange(1000, 2000)])
 | ||
| 
 | ||
|     def __getitem__(self, index):
 | ||
|         return self.data[index], self.label[index]
 | ||
| 
 | ||
|     def __len__(self):
 | ||
|         return len(self.label)
 | ||
| 
 | ||
| class BCG_Operation():
 | ||
|     def __init__(self, sample_rate=1000):
 | ||
|         self.sample_rate = sample_rate
 | ||
| 
 | ||
|     def down_sample(self,data=None, down_radio=10):
 | ||
|         if data is None:
 | ||
|             raise ValueError("data is None, please given an real value!")
 | ||
|         length_before = len(data)
 | ||
|         length_after = length_before//down_radio
 | ||
|         data = data[:length_after*down_radio]
 | ||
|         data = data.reshape(-1,down_radio)
 | ||
|         data = data[:,0]
 | ||
|         self.sample_rate = self.sample_rate/down_radio
 | ||
|         return data
 | ||
| 
 | ||
|     def Splitwin(self, data=None, len_win=None, coverage=1.0,calculate_to_end=False):
 | ||
|         """
 | ||
|         分窗
 | ||
|         :param len_win:  length of window
 | ||
|         :return:         signal windows
 | ||
|         """
 | ||
|         if ( len_win is None) or (data is None):
 | ||
|             raise ValueError("length of window or data is None, please given an real value!")
 | ||
|         else:
 | ||
|             length = len_win * self.sample_rate   # number point of a window
 | ||
|         # step of split windows
 | ||
|         step = length*coverage
 | ||
|         start = 0
 | ||
|         Splitdata = []
 | ||
|         while (len(data)-start>=length):
 | ||
|             Splitdata.append( data[int(start):int(start+length)] )
 | ||
|             start += step
 | ||
|         if calculate_to_end and (len(data)-start>2000):
 | ||
|             remain = len(data)-start
 | ||
|             start = start - step
 | ||
|             step = int(remain/2000)
 | ||
|             start = start + step*2000
 | ||
|             Splitdata.append(data[int(start):int(start+length)])
 | ||
|             return np.array(Splitdata), step
 | ||
|         elif calculate_to_end :
 | ||
|             return np.array(Splitdata), 0
 | ||
|         else:
 | ||
|             return np.array(Splitdata)
 | ||
| 
 | ||
|     def Butterworth(self,data, type, low_cut = 0.0, high_cut = 0.0, order = 10):
 | ||
|         """
 | ||
|         :param type:      Type of Butter. filter, lowpass, bandpass, ...
 | ||
|         :param lowcut:    Low cutoff frequency
 | ||
|         :param highcut:   High cutoff frequency
 | ||
|         :param order:     Order of filter
 | ||
|         :return:          Signal after filtering
 | ||
|         """
 | ||
|         if type == "lowpass":     # 低通滤波处理
 | ||
|             b, a = signal.butter(order, low_cut / (self.sample_rate * 0.5), btype='lowpass')
 | ||
|             return signal.filtfilt(b, a, np.array(data))
 | ||
|         elif type == "bandpass":  # 带通滤波处理
 | ||
|             low = low_cut / (self.sample_rate * 0.5)
 | ||
|             high = high_cut / (self.sample_rate * 0.5)
 | ||
|             b, a = signal.butter(order, [low, high], btype='bandpass')
 | ||
|             return signal.filtfilt(b, a, np.array(data))
 | ||
|         elif type == "highpass":  # 高通滤波处理
 | ||
|             b, a = signal.butter(order, high_cut / (self.sample_rate * 0.5), btype='highpass')
 | ||
|             return signal.filtfilt(b, a, np.array(data))
 | ||
|         else:                     # 警告,滤波器类型必须有
 | ||
|             raise ValueError("Please choose a type of fliter")
 | ||
| 
 | ||
|     def AmpMovement(self, data, win_size, threshold=20, get_judge_line=False):
 | ||
|         """
 | ||
|         基于幅值方法检测体动:
 | ||
|             1.将输入信号按win_size切分
 | ||
|             2.将每个win_size信号片段分窗,每个窗2s,步长为2s
 | ||
|             3.计算一分钟所有信号窗的最大峰谷值差,获取中位数和均值
 | ||
|             4.所有2s时间窗内,大于中位数/均值的2.2倍视为体动
 | ||
|             5.体动间间隔过短的信号,同样标记为体动
 | ||
|         :param data:            Input signal
 | ||
|         :param win_size:        Size of the win(Must be a multiple of 2)
 | ||
|         :return:                State of signal
 | ||
|         """
 | ||
|         Dataframe, cover_num = self.Splitwin(data, len_win=win_size, coverage=1.0, calculate_to_end=True)
 | ||
|         state_all = np.array([])
 | ||
|         Amp_list = np.array([])
 | ||
|         for win in range(Dataframe.shape[0]):
 | ||
|             state = np.array([])
 | ||
|             # two seconds window
 | ||
|             data_win = self.Splitwin(Dataframe[win], len_win=2, coverage=1.0)
 | ||
|             Amp = np.zeros(data_win.shape[0])
 | ||
|             for i in range(data_win.shape[0]):
 | ||
|                 Amp[i] = np.max(data_win[i]) - np.min(data_win[i])  # max - min
 | ||
|             # 取..位数
 | ||
|             Median_Amp = np.percentile(Amp, 20)  # 20%
 | ||
|             if get_judge_line:
 | ||
|                 Amp_list = np.append(Amp_list, np.full(win_size * self.sample_rate, 2.3 * Median_Amp))
 | ||
| 
 | ||
|             for i in range(len(Amp)):
 | ||
|                 if (Amp[i] > 2.1 * Median_Amp):
 | ||
|                     state = np.append(state, "Movement")
 | ||
|                 elif Amp[i] < threshold:
 | ||
|                     state = np.append(state, "Nobody")
 | ||
|                 else:
 | ||
|                     state = np.append(state, "Sleep")
 | ||
| 
 | ||
|             if win == Dataframe.shape[0] - 1 and cover_num > 0:
 | ||
|                 state = state[-int(cover_num):]
 | ||
| 
 | ||
|             state_all = np.append(state_all, state)
 | ||
| 
 | ||
|         if get_judge_line:
 | ||
|             return state_all, Amp_list
 | ||
|         else:
 | ||
|             return state_all
 | ||
| 
 | ||
|     def preprocess1(self):
 | ||
|         # ----------------------------------------------------------
 | ||
|         data_dir = "../in_data/"
 | ||
|         dir_list = os.listdir(data_dir)
 | ||
| 
 | ||
|         data_list = [data_dir + dir + "/orgData.txt" for dir in dir_list]
 | ||
|         label_list = [data_dir + dir + "/label.txt" for dir in dir_list]
 | ||
|         print(data_list)
 | ||
|         print(label_list)
 | ||
|         for i in range(len(data_list)):
 | ||
|             orgBCG = np.array(pd.read_csv(data_list[i], header=None)).reshape(-1)
 | ||
|             orgLabel = np.array(pd.read_csv(label_list[i])).reshape(-1)
 | ||
| 
 | ||
|             # ---------------------Movement Detection-------------------------
 | ||
|             operation = BCG_Operation()
 | ||
|             BCG = operation.Butterworth(data=orgBCG, type="bandpass", low_cut=2.5, high_cut=10, order=2)
 | ||
|             state_win60 = operation.AmpMovement(orgBCG, win_size=60)
 | ||
| 
 | ||
|             visual_state = np.array([])
 | ||
|             for num in range(state_win60.shape[0]):
 | ||
|                 print("state_num/all_state: ", num, '/', state_win60.shape[0])
 | ||
|                 if state_win60[num] == "Movement":
 | ||
|                     visual_state = np.append(visual_state, np.full(2000, 1))
 | ||
|                 else:
 | ||
|                     visual_state = np.append(visual_state, np.full(2000, 0))
 | ||
| 
 | ||
|             # ------------------------------------------------------------------
 | ||
|             downBCG = operation.down_sample(data=orgBCG, down_radio=10)
 | ||
|             downLabel = operation.down_sample(data=orgLabel, down_radio=10)
 | ||
|             downState = operation.down_sample(data=visual_state, down_radio=10)
 | ||
| 
 | ||
|             length_before = len(downState)
 | ||
|             length_after = length_before // 1000
 | ||
|             downBCG = downBCG[:length_after * 1000]
 | ||
|             downLabel = downLabel[:length_after * 1000]
 | ||
|             downState = downState[:length_after * 1000]
 | ||
| 
 | ||
|             downBCG = downBCG.reshape(-1, 1000)
 | ||
|             downLabel = downLabel.reshape(-1, 1000)
 | ||
|             downState = downState.reshape(-1, 1000)
 | ||
|             downState = np.max(downState, axis=1)
 | ||
| 
 | ||
|             df_BCG = pd.DataFrame(downBCG)
 | ||
|             df_label = pd.DataFrame(downLabel)
 | ||
|             df_state = pd.DataFrame(downState, columns=["state"])
 | ||
|             df_BCG.to_csv()
 | ||
| 
 | ||
|             df_all = pd.concat([df_BCG, df_label, df_state], axis=1)
 | ||
|             df_all.to_csv(data_dir + "/data" + str(i + 1) + ".txt", index=False)
 | ||
| 
 | ||
| def read_all_data(data_dir):
 | ||
|     df_all = pd.read_csv(data_dir)
 | ||
|     df_clean = df_all[ df_all["state"]==0.0 ]
 | ||
|     df_artifact = df_all[ df_all["state"]==1.0 ]
 | ||
|     data_clean = df_clean.iloc[:,np.arange(1000)]
 | ||
|     label_clean = df_clean.iloc[:,np.arange(1000,2000)]
 | ||
|     data_artifact = df_artifact.iloc[:,np.arange(1000)]
 | ||
|     label_artifact = df_artifact.iloc[:,np.arange(1000,2000)]
 | ||
| 
 | ||
|     return np.array(data_clean),np.array(label_clean),np.array(data_artifact),np.array(label_artifact)
 | ||
| 
 | ||
| 
 | ||
| #orgBCG = np.array(pd.read_csv("../in_data/data1zuo/orgData.txt", header=None)).reshape(-1)
 | ||
| #orgLabel = np.array(pd.read_csv("../in_data/data1zuo/label.txt")).reshape(-1)
 | ||
| ## ---------------------Movement Detection-------------------------
 | ||
| #operation = BCG_Operation()
 | ||
| #BCG = operation.Butterworth(data=orgBCG, type="bandpass", low_cut=2.5, high_cut=10, order=2)
 | ||
| #state_win60 = operation.AmpMovement(orgBCG, win_size=60)
 | ||
| #visual_state = np.array([])
 | ||
| #for num in range(state_win60.shape[0]):
 | ||
| #    print("state_num/all_state: ", num, '/', state_win60.shape[0])
 | ||
| #    if state_win60[num] == "Movement":
 | ||
| #        visual_state = np.append(visual_state, np.full(2000, 1))
 | ||
| #    else:
 | ||
| #        visual_state = np.append(visual_state, np.full(2000, 0))
 | ||
| ## ------------------------------------------------------------------
 | ||
| #downBCG = operation.down_sample(data=orgBCG, down_radio=10)
 | ||
| #downLabel = operation.down_sample(data=orgLabel, down_radio=10)
 | ||
| #downState = operation.down_sample(data=visual_state, down_radio=10)
 | ||
| #length_before = len(downState)
 | ||
| #length_after = length_before // 1000
 | ||
| #downBCG = downBCG[:length_after * 1000]
 | ||
| #downLabel = downLabel[:length_after * 1000]
 | ||
| #downState = downState[:length_after * 1000]
 | ||
| #downBCG = downBCG.reshape(-1, 1000)
 | ||
| #downLabel = downLabel.reshape(-1, 1000)
 | ||
| #downState = downState.reshape(-1, 1000)
 | ||
| #downState = np.max(downState, axis=1)
 | ||
| #df_BCG = pd.DataFrame(downBCG)
 | ||
| #df_label = pd.DataFrame(downLabel)
 | ||
| #df_state = pd.DataFrame(downState, columns=["state"])
 | ||
| #df_BCG.to_csv()
 | ||
| #df_all = pd.concat([df_BCG, df_label, df_state], axis=1)
 | ||
| #df_all.to_csv("../in_data/data1zuo.txt", index=False)
 | ||
| 
 | ||
| 
 | ||
| #data_dir = glob("../in_data/*.txt")
 | ||
| #print(data_dir)
 | ||
| #for num in range(len(data_dir)):
 | ||
| #    if num==0 :
 | ||
| #        all_data = pd.read_csv(data_dir[num])
 | ||
| #    else:
 | ||
| #        all_data = pd.concat([all_data,pd.read_csv(data_dir[num])],ignore_index=True,axis=0)
 | ||
| #
 | ||
| #all_data.to_csv("../in_data/all_data.txt",index=False)
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| #data = pd.read_csv("../in_data/all_data.txt")
 | ||
| #clean_data = data[data["state"]==0]
 | ||
| #Movement_data = data[data["state"]==1]
 | ||
| #print(data.shape)
 | ||
| #print(clean_data.shape)
 | ||
| #
 | ||
| ## --------------------        划分训练集和测试集:7:3       ----------------------------
 | ||
| #sample = clean_data.sample(int(0.3*len(clean_data)))
 | ||
| #sample_index = sample.index
 | ||
| #print(sample.shape)
 | ||
| #print(sample_index)
 | ||
| ## 剩余数据
 | ||
| #all_index = clean_data.index
 | ||
| ## 去除sample之后剩余的数据
 | ||
| #residue_index = all_index.difference(sample_index)
 | ||
| #print(residue_index.shape)
 | ||
| #print(residue_index)
 | ||
| #residue = clean_data.loc[residue_index]
 | ||
| ## 保存
 | ||
| #test = pd.concat([sample,Movement_data],ignore_index=True)
 | ||
| #test.to_csv("../in_data/test.txt",index=False)
 | ||
| #residue.to_csv("../in_data/train.txt",index=False)
 | ||
| 
 |