Skip to content
Snippets Groups Projects
Preprocessing.py 12.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • Anna Neumann's avatar
    Anna Neumann committed
    def prep_whole_dividable(path_load, path_save, firstdim, secdim):
    
    Anna Neumann's avatar
    Anna Neumann committed
    
        import string
        import os
        import numpy as np
        import scipy.io
        import math
        from tqdm import tqdm
    
        # --------------------------------------------------------------
    
        # Get files from path
    
        path_save_one = path_save
    
    
    Anna Neumann's avatar
    Anna Neumann committed
        for p in range(0,3):
    
    Anna Neumann's avatar
    Anna Neumann committed
            if p == 0:
                path_load_real = path_load + '/Train'
                path_save_real = path_save_one + '/Train'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 1:
                path_load_real = path_load + '/Validation'
                path_save_real = path_save_one + '/Validation'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 2:
                path_load_real = path_load + '/Test'
                path_save_real = path_save_one + '/Test'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
    
            files_all = os.listdir(path_load_real)
    
    Anna Neumann's avatar
    Anna Neumann committed
            files_all = sorted(files_all,key=lambda x: ((int(x.split("_")[2])),(int(x.split("_")[3])),(int(x.split("_")[4])),(x.split("_")[5])))
            print(files_all)
    
            for file in tqdm(files_all):
            # for files in range(0,len(files_all),skipcount):
                i = files_all.index(file)
                # print(i)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
                fourierspec2 = []
    
    Anna Neumann's avatar
    Anna Neumann committed
                labelpsd1 = []
    
    Anna Neumann's avatar
    Anna Neumann committed
                labelpsd2 = []
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                # for file in tqdm(files_all[files:files+skipcount]):
                path_loadcontents = path_load_real + '/' + file
                contents = np.load(path_loadcontents)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                fourierspec = []
                labelpsd = []
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                fourier = contents['dbmixed']
                fourier = fourier[:firstdim,:secdim]
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                fourierspec2.append(fourier)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                label1 = contents['dbspeech']
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                labelpsd1.append(label1[:firstdim,:secdim])
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                label2 = contents['dbmusic']
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                labelpsd2.append(label2[:firstdim,:secdim])
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                # if option == 'music':
                #     label = contents['dbmusic']
                # elif option == 'speech':
                #     label = contents['dbspeech']
                # else:
                #     print('option not right!')
                #
                # label = label[:firstdim,:secdim]
                #
                # labelpsd2.append(label)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    
    Anna Neumann's avatar
    Anna Neumann committed
                train_data = np.array(fourierspec2)
                train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], train_data.shape[2], 1))
                # print(train_data.shape)
                train_labels_s = np.array(labelpsd1)
                train_labels_s = np.reshape(train_labels_s, (train_labels_s.shape[0], train_labels_s.shape[1], train_labels_s.shape[2], 1))
                # print(train_labels_s.shape)
                train_labels_m = np.array(labelpsd2)
                train_labels_m = np.reshape(train_labels_m, (train_labels_m.shape[0], train_labels_m.shape[1], train_labels_m.shape[2], 1))
                # print(train_labels_m.shape)
                path_savecontents = path_save_real + '/Processed_' + str(i) + '_whole_dividable.npz'
                # print(path_savecontents)
                np.savez(path_savecontents, x=train_data, y=train_labels_s, z=train_labels_m)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
        return path_save_one
    
    def prep_flatten_1weiter(path_load, path_save, skipcount, framelength, option):
    
        import os
        import numpy as np
        import scipy.io
        import math
        from tqdm import tqdm
    
        # -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    
        path_save_one = path_save
    
        for p in range(0,3):
            if p == 0:
                path_load_real = path_load + '/Train'
                path_save_real = path_save_one + '/Train'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 1:
                path_load_real = path_load + '/Validation'
                path_save_real = path_save_one + '/Validation'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 2:
                path_load_real = path_load + '/Test'
                path_save_real = path_save_one + '/Test'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
    
            # Get files from path
            files_all = os.listdir(path_load_real)
            files_all = sorted(files_all)
    
    
            for files in range(0,len(files_all),skipcount):
    
                fourierspec2 = []
                labelpsd2 = []
    
                for file in tqdm(files_all[files:files+skipcount]):
    
                    path_loadcontents = path_load_real + '/' + file
                    contents = np.load(path_loadcontents)
    
                    fourierspec = []
                    labelpsd = []
    
                    fourier = contents['dbmixed']
    
                    for n in list(range(0, (math.floor(fourier.shape[1]/framelength)-1)*framelength+1)):
                        fourierlil = fourier[:,n:n+framelength]
                        fourierlil = np.transpose(fourierlil)
                        fourierlil = np.reshape(fourierlil, (fourier.shape[0]*framelength))
                        fourierspec.append(fourierlil)
    
                    fourierspec2.extend(fourierspec)
    
                    if option == 'music':
                        label = contents['dbmusic']
                    elif option == 'speech':
                        label = contents['dbspeech']
                    else:
                        print('option not right!')
    
                    for n in list(range(0, (math.floor(fourier.shape[1]/framelength)-1)*framelength+1)):
                        labellil = label[:,n+framelength-1]
                        labelpsd.append(labellil)
    
                    labelpsd2.extend(labelpsd)
    
                train_data = np.array(fourierspec2)
                train_labels = np.array(labelpsd2)
    
                path_savecontents = path_save_real + '/Processed_' + str(files) + '_' + str(train_data.shape[0]) + '_flatten_1weiter.npz'
                np.savez(path_savecontents, x=train_data, y=train_labels)
    
    
    Anna Neumann's avatar
    Anna Neumann committed
        return path_save_one
    
    Anna Neumann's avatar
    Anna Neumann committed
    
    def prep_flatten_4weiter(path_load, path_save, skipcount, framelength, option):
    
        import os
        import numpy as np
        import scipy.io
        import math
        from tqdm import tqdm
    
        # -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        path_save_one = path_save
    
        for p in range(0,3):
            if p == 0:
                path_load_real = path_load + '/Train'
                path_save_real = path_save_one + '/Train'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 1:
                path_load_real = path_load + '/Validation'
                path_save_real = path_save_one + '/Validation'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 2:
                path_load_real = path_load + '/Test'
                path_save_real = path_save_one + '/Test'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
    
            # Get files from path
            files_all = os.listdir(path_load_real)
            files_all = sorted(files_all)
    
            for files in tqdm(range(0,len(files_all),skipcount)):
    
                fourierspec2 = []
                labelpsd2 = []
    
                for file in tqdm(files_all[files:files+skipcount]):
    
                    path_loadcontents = path_load_real + '/' + file
                    contents = np.load(path_loadcontents)
    
                    fourierspec = []
                    labelpsd = []
    
                    fourier = contents['dbmixed']
                    print(fourier.shape)
                    # if p == 0 & files == 0:
                    #     diffcount = math.floor(fourier.shape[1]/framelength)
                    #     return diffcount
    
                    for n in list(range(0, math.floor(fourier.shape[1]/framelength))):
                        fourierlil = fourier[:,framelength*n:framelength*n+framelength]
                        fourierlil = np.transpose(fourierlil)
                        fourierlil = np.reshape(fourierlil, (fourier.shape[0]*framelength))
                        print(np.size(fourierlil))
                        fourierspec.append(fourierlil)
                        print(np.size(fourierspec))
    
                    fourierspec2.extend(fourierspec)
    
                    if option == 'music':
                        label = contents['dbmusic']
                    elif option == 'speech':
                        label = contents['dbspeech']
                    else:
                        print('option not right!')
    
                    for n in list(range(0, math.floor(label.shape[1]/framelength))):
                        labellil = label[:,framelength*n+framelength-1]
                        print(np.size(labellil))
                        labelpsd.append(labellil)
                        print(np.size(labelpsd))
    
                    labelpsd2.extend(labelpsd)
    
                train_data = np.array(fourierspec2)
                train_labels = np.array(labelpsd2)
    
                path_savecontents = path_save_real + '/Processed_' + str(files) + '_' + str(train_data.shape[0]) + '_' + str(framelength) + '_flatten.npz'
                np.savez(path_savecontents, x=train_data, y=train_labels)
    
    Anna Neumann's avatar
    Anna Neumann committed
    
        return path_save_one
    
    def prep_framelength(path_load, path_save, framelength):
    
        import os
        import numpy as np
        import scipy.io
        import math
        from tqdm import tqdm
    
        # -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        path_save_one = path_save
    
        for p in range(0,3):
            if p == 0:
                path_load_real = path_load + '/Train'
                path_save_real = path_save_one + '/Train'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 1:
                path_load_real = path_load + '/Validation'
                path_save_real = path_save_one + '/Validation'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
            elif p == 2:
                path_load_real = path_load + '/Test'
                path_save_real = path_save_one + '/Test'
                if not os.path.exists(path_save_real):
                    os.makedirs(path_save_real)
    
            # Get files from path
            files_all = os.listdir(path_load_real)
            files_all = sorted(files_all,key=lambda x: ((int(x.split("_")[2])),(int(x.split("_")[3])),(int(x.split("_")[4])),(x.split("_")[5])))
            # files_all = sorted(files_all,key=lambda x: int(x.split("_",2)[2]))
            print(files_all)
    
            for file in tqdm(files_all):
    
                i = files_all.index(file)
                # print(i)
                # fourierspec2 = []
                # labelpsd2 = []
    
                path_loadcontents = path_load_real + '/' + file
                contents = np.load(path_loadcontents)
    
                fourierspec = []
                labelpsd1 = []
                labelpsd2 = []
    
                fourier = contents['dbmixed']
    
                for n in list(range(fourier.shape[1]-(framelength-1))):
                    fourierlil = fourier[:,n:n+framelength]
                    fourierspec.append(fourierlil)
    
                # print(fourierlil)
                # print(np.array(fourierspec).shape)
    
                # if option == 'music':
                #     label = contents['dbmusic']
                # elif option == 'speech':
                #     label = contents['dbspeech']
                # else:
                #     print('option not right!')
    
                label_s = contents['dbspeech']
                for n in list(range(fourier.shape[1]-(framelength-1))):
                    labellil1 = label_s[:,n+framelength-1]
                    labelpsd1.append(labellil1)
    
                label_m = contents['dbmusic']
                for n in list(range(fourier.shape[1]-(framelength-1))):
                    labellil2 = label_m[:,n+framelength-1]
                    labelpsd2.append(labellil2)
    
                # print(labellil)
                # print(np.array(labelpsd).shape)
    
                train_data = np.array(fourierspec)
                train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], train_data.shape[2], 1))
                # print(train_data.shape)
                train_labels_s = np.array(labelpsd1)
                train_labels_s = np.reshape(train_labels_s, (train_labels_s.shape[0], train_labels_s.shape[1], 1, 1))
                # print(train_labels_s.shape)
                train_labels_m = np.array(labelpsd2)
                train_labels_m = np.reshape(train_labels_m, (train_labels_m.shape[0], train_labels_m.shape[1], 1, 1))
                # print(train_labels_m.shape)
    
                path_savecontents = path_save_real + '/Processed_' + str(i) + '_' + str(framelength) + '_frames.npz'
                # print(path_savecontents)
                np.savez(path_savecontents, x=train_data, y=train_labels_s, z=train_labels_m)
    
        return path_save_one