Matterport3D数据处理

2021-7-3

最近一段时间在做采用Matterport3D数据进行语义分割的实验，由于编写的深度学习模型采用的是（X，Y，Z，R，G，B）格式的点云数据，而Matterport3D为PLY格式的Mesh数据，因此需要将原始PLY数据进行预处理，主要实现的处理功能有：

（0）公共参数

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
读取Region并写入excel
"""
import os
from pathlib import Path
from numpy.lib.utils import source
from plyfile import PlyData
import json
from category import category
import time
import xlwt
import xlrd
import glob
import numpy as np

g_class2color = {
    'bookcase': [10, 200, 100],
    'chair': [255, 0, 0],
    'door': [200, 200, 100],
    'table': [170, 120, 200],
    'wall': [0, 255, 255],
    'window': [100, 100, 255],
    'bed': [50, 100, 50],
    'ceiling': [0, 255, 0],
    'floor': [0, 0, 255],
    'beam': [255, 255, 0],
    'column': [255, 0, 255],
    'sofa': [200, 100, 100],
    'board': [200, 200, 200],
    'cabinet': [100, 50, 100]
}
g_classes = g_class2color.keys()
g_class2label = {cls: i for i,cls in enumerate(g_classes)}

# SOURCE_DIR = os.path.dirname('I:/m3d/v1/scans/')
SOURCE_DIR = os.path.dirname('E:/DataSets/m3d_region_txt/')
OUTPUT_DIR = os.path.dirname('E:/DataSets/m3d_region_npy/')
out_path = Path(OUTPUT_DIR)
out_path.mkdir(exist_ok=True)
scences = [line.rstrip().replace('\n', '')
           for line in open('paths.txt')]  # 场景列表
scences2id = {cls: i+1 for i, cls in enumerate(scences)}  # 场景与序号列表
scences_id = open('scences_id.txt', 'w+')
scences_id.write(scences2id.__str__())  # 保存类别对应信息
scences_id.close()

（1）将Region数据读入到Excel表

# 读取region场景并写入到excel
def read_region_to_xls():
    # 新建Excel表格
    wbk = xlwt.Workbook()
    sheet = wbk.add_sheet('RegionList', cell_overwrite_ok=True)
    row = 0
    column = 0
    for sid in scences2id.keys():
        region_path = os.path.join(
            SOURCE_DIR, '{}/region_segmentations/{}/region_segmentations/'.format(sid, sid))  # region 所在路径
        sheet.write(row, column, sid)
        column = column + 1
        sheet.write(row, column, scences2id[sid])
        row = row + 1
        for file in glob.glob(os.path.join(region_path, '*.ply')):
            file = os.path.basename(file).split('.')[0]
            sheet.write(row, column, file)
            row = row + 1
        column = 0

    wbk.save('region.xlsx')

（2）将PLY格式转换为TXT

# 读取region场景并写入到txt
def read_region_to_txt():
    for sid in scences2id.keys():
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ': |-- '+sid + " Start...")
        house_path = out_path.joinpath(sid)
        house_path.mkdir(exist_ok=True)
        region_list = os.path.join(
            SOURCE_DIR, '{}/region_segmentations/{}/region_segmentations/'.format(sid, sid))  # region 所在路径
        for file in glob.glob(os.path.join(region_list, '*.ply')):
            region = os.path.basename(file).split('.')[0]
            region_path = house_path.joinpath(region)
            region_path.mkdir(exist_ok=True)  # 创建regionpath
            # 1. 读取ply文件
            plydata = PlyData.read(file)
            data = plydata.elements[0].data
            index_dt = {}
            for ln in plydata.elements[1].data:
                if ln[1] in index_dt.keys():
                    pass
                else:
                    index_dt[ln[1]] = []
                index_dt[ln[1]].append(ln[0][0])
                index_dt[ln[1]].append(ln[0][1])
                index_dt[ln[1]].append(ln[0][2])
                index_dt[ln[1]] = list(set(index_dt[ln[1]]))

            # 2. 读取json文件
            json_file = os.path.join(SOURCE_DIR, '{}/region_segmentations/{}/region_segmentations/{}.semseg.json'.format(sid, sid, region))
            f = open(json_file)
            segment_data = json.load(f, strict = False)
            f.close()
            segment_data = segment_data['segGroups']
            class_type = {}
            for segment in segment_data:
                label = segment['label'].replace(' ','_')
                label = label.replace(' ','')
                label = label.replace('/','')
                label = label.replace('?','')
                label = label.replace('\\','')
                label = label.replace('.','')
                label = label.replace(';','')
                if label in class_type.keys():
                    class_type[label] = class_type[label] + 1
                else:
                    class_type[label] = 1
                label_file = os.path.join(region_path,'{}_{}.txt'.format(label, class_type[label]))
                if os.path.exists(label_file):
                    pass
                else:
                    f = open(label_file, 'a+')
                    segment_index = segment['segments']
                    for idx in segment_index:
                        vertex = index_dt[idx]
                        for vtx in vertex:
                            # x y z r g b
                            f.write('{} {} {} {} {} {}\n'.format(data[vtx][0],data[vtx][1],data[vtx][2],data[vtx][8],data[vtx][9],data[vtx][10]))
                    f.close()
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ':    |-- ' + region)
        
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ': |-- ' + sid + " DONE!")

（3）将TXT格式文件转为Numpy的NPY格式

# 读取region场景并写入到npy
def collect_point_label(anno_path, out_filename, file_format='txt'):
    if not os.path.exists(out_filename):
        points_list = []
        for f in glob.glob(os.path.join(anno_path, '*.txt')):
            clss = os.path.basename(f).split('_')
            for cls in clss:
                if cls in g_classes: # note: in some room there is 'staris' class..
                    points = np.loadtxt(f)
                    labels = np.ones((points.shape[0],1)) * g_class2label[cls]
                    points_list.append(np.concatenate([points, labels], 1)) # Nx7
        if len(points_list) > 0:
            data_label = np.concatenate(points_list, 0)
            xyz_min = np.amin(data_label, axis=0)[0:3]
            data_label[:, 0:3] -= xyz_min
            
            if file_format=='txt':
                fout = open(out_filename, 'w')
                for i in range(data_label.shape[0]):
                    fout.write('%f %f %f %d %d %d %d\n' % \
                                (data_label[i,0], data_label[i,1], data_label[i,2],
                                data_label[i,3], data_label[i,4], data_label[i,5],
                                data_label[i,6]))
                fout.close()
            elif file_format=='numpy':
                np.save(out_filename, data_label)
            else:
                print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \
                    (file_format))
                exit()
def read_region_to_npy():
    for sid in scences2id.keys():
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ': |-- '+sid + " Start...")
        region_list = os.listdir(os.path.join(SOURCE_DIR, sid))
        for region in region_list:
            TxtPath = os.path.join(SOURCE_DIR, sid, region)
            out_filename = os.path.join(OUTPUT_DIR, 'Area_{}_{}.npy'.format(sid, region))
            collect_point_label(TxtPath, out_filename, 'numpy')
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ':    |-- ' + region)
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ': |-- ' + sid + " DONE!")

（4）将NPY文件转换为Area_{场景序号}_{场景类型}_{类型编号}格式，方便程序训练

# 读取region并将文件夹名命
def region_name_to_class():
    wb = xlrd.open_workbook('region.xls')
    region2class = {}
    sh = wb.sheet_by_name('region2class')
    for i in range(sh.nrows):
        if sh.cell(i,2).value == '':
            region2class[sh.cell(i,0).value] = {'id':int(sh.cell(i,1).value)}
        else:
            region2class[list(region2class.keys())[-1]][sh.cell(i,1).value]=sh.cell(i,2).value

    # 替换文件名称
    npy_path = os.path.dirname('E:/DataSets/m3d_region_npy/')
    ply_files = os.listdir(npy_path)
    region_count = {}
    for ply_file in ply_files:
        file_arr = ply_file.replace('.npy','').split('_')
        if file_arr[1] not in region_count.keys():
            region_count[file_arr[1]] = {}
        regionkey = region2class[file_arr[1]][file_arr[2]]
        if regionkey not in region_count[file_arr[1]].keys():
            region_count[file_arr[1]][regionkey] = 1
        else:
            region_count[file_arr[1]][regionkey] = region_count[file_arr[1]][regionkey] + 1

        old = os.path.join(npy_path, ply_file)
        to = os.path.join(npy_path, '{}_{}_{}_{}.npy'.format(file_arr[0], region2class[file_arr[1]]['id'], regionkey, region_count[file_arr[1]][regionkey]))
        print(old + ' -> ' + to)
        os.rename(old, to)

阅读剩余

作者：姜九二

链接：https://www.jiangjianwu.cn/research/dataset/752/matterport3d%e6%95%b0%e6%8d%ae%e5%a4%84%e7%90%86.html

文章版权归作者所有，未经允许请勿转载。

THE END