目录
- 数据集
- 软件
- 处理数据集
####软件
获取软件ASAP
本文基于ubuntu系统,所以下载deb文件,双击安装后,然后拷贝根目录下/opt/ASAP到自己建立的操作目录下.便于我们在python代码中调用.
import sys
sys.path.append("ASAP/bin")
import multiresolutionimageinterface as mir

####处理数据集
结果图

所需准备

import os
import re
import json
import xmltodict
import numpy as np
import pandas as pd
import cv2
import sys
sys.path.append("ASAP/bin")
import multiresolutionimageinterface as mir
def read_rol_to_dist(path):
save = os.path.join(path, 'annotation.json')
if not os.path.exists(save):
f_a = "lesion_annotations"
folder = os.path.join(path, f_a)
f_list = os.listdir(folder)
annotation = {}
for i in f_list:
name = i.split(".")[0]
d = {}
t = os.path.join(folder, i)
with open(t, 'r') as f:
xml = f.read()
xmlparse = xmltodict.parse(xml)
jsonstr = json.dumps(xmlparse,indent=4)
data = json.loads(jsonstr)
judge = data["ASAP_Annotations"]["Annotations"]["Annotation"]
if type(judge) == type({}):
d[judge['@Name']] = judge['Coordinates']
else:
for j in judge:
d[j['@Name']] = j['Coordinates']
annotation[name] = d
with open(save, 'w') as f:
json.dump(annotation, f, indent=4)
with open(save, 'r') as ff:
json_data = json.load(ff)
return json_data
def need_folder(f_p, f_i):
if not os.path.exists(f_p):
raise Exception("请解压缩在:{} 中".format(f_p))
if not os.path.exists(f_i):
os.mkdir(f_i)
def create_store(f_p, f_i):
f_list = os.listdir(f_p)
for i in f_list:
if "." in i:
pass
else:
t = os.path.join(f_i, i)
if not os.path.exists(t):
os.mkdir(t)
def img_have(f_p, f_i):
f_list = os.listdir(f_p)
haved_img = {}
for patient in f_list:
if "." not in patient:
path = os.path.join(f_p, patient)
patient_img_list = os.listdir(path)
for i in patient_img_list:
haved_img[i.split(".")[0]] = os.path.join(path, i)
return haved_img
def handle_c(path):
f_csv = os.path.join(path, "stage_labels.csv")
classify = pd.read_csv(f_csv)
_ = list(classify.loc[:,'patient'])
patient = []
for i in _ :
patient.append(i.split(".")[0])
stage = list(classify.loc[:,'stage'])
r_c = {}
for i in range(len(patient)):
r_c[patient[i]] = stage[i]
return r_c
def calculate_border(coordinate):
x = []
y = []
c_list = coordinate["Coordinate"]
if type(c_list) == type({}):
print("存在医生误点,一个标记只有一个坐标")
print(c_list)
return 0, 0, 0, 0
else:
for i in c_list:
x.append(float(i["@X"]))
y.append(float(i["@Y"]))
bbox = [min(x), min(y), max(x), max(y)]
return 1, bbox, x , y
def calculate_left_top(bbox, extend):
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
left_top = (bbox[0], bbox[1] )
l_f = (left_top[0]-(extend/2) , left_top[1] - (extend/2) )
w = width +extend
h = height + extend
l = max(w, h)
e_p = [l_f, l]
return e_p
def relative_coordinate(bbox, x_c_list, y_c_list, extend):
origin = (bbox[0], bbox[1] )
relative_x = []
relative_y = []
assert len(relative_x) == len(relative_y)
for i in range(len(x_c_list)):
relative_x.append(x_c_list[i]-origin[0] + extend/2)
relative_y.append(y_c_list[i]-origin[1] + extend/2)
d2_list = []
for i in range(len(relative_x)):
t = [relative_x[i], relative_y[i]]
d2_list.append(t)
return d2_list
def img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img):
stage = classify[node_key]
i_p = haved_img[node_key].split("/")
if source_img == 0:
save_name = node_key + "_" + stage + "_" + num + ".jpg"
else:
save_name = node_key + "_" + stage + "__" + num + ".jpg"
save_p = os.path.join(f_i, i_p[2], save_name)
cv2.imwrite(save_p, image_patch)
def json_save(d2_list, haved_img, node_key, num, f_p, f_i, classify):
stage = classify[node_key]
i_p = haved_img[node_key].split("/")
save_name = node_key + "_" + stage + "_" + num + ".json"
save_p = os.path.join(f_i, i_p[2], save_name)
d = {"coordinate":d2_list}
with open(save_p, 'w') as f:
json.dump(d, f, indent=4)
def operate(f_p, f_i, node_key, num, coordinate, haved_img, classify):
img_name = haved_img[node_key]
reader = mir.MultiResolutionImageReader()
mr_image = reader.open(img_name)
level = 0 #倒金字塔型图片, level=0 为最大
ds = mr_image.getLevelDownsample(level)
judge, bbox, x_c_list, y_c_list = calculate_border(coordinate)
if judge == 1:
extend = 100
position = calculate_left_top(bbox, extend)
source_img = 1
image_patch = mr_image.getUCharPatch(int(position[0][0]* ds), int(position[0][1]* ds), int(position[1]), int(position[1]), level)
img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img)
d2_list = relative_coordinate(bbox, x_c_list, y_c_list, extend)
json_save(d2_list, haved_img, node_key, num, f_p, f_i, classify)
points=np.array(d2_list,np.int32) #多边形的顶点坐标
cv2.polylines(image_patch,[points],True,(0,0,255),thickness = 2) #画任意多边形
source_img = 0
# cv2.imshow("1", image_patch)
# cv2.waitKey(0)
img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img)
def generate_img(haved_img, annotation, f_p, f_i, classify):
for node_key in haved_img:
try:
judge = annotation[node_key]
except KeyError:
continue
else:
for per_img in judge:
num = re.sub(r'\D', "", per_img)
coordinate = judge[per_img]
operate(f_p, f_i, node_key, num, coordinate, haved_img, classify)
def cut_img(path, annotation):
f_p = os.path.join(path, "patient")
f_i = os.path.join(path, "image")
need_folder(f_p, f_i)
create_store(f_p, f_i)
haved_img = img_have(f_p, f_i)
classify = handle_c(path)
generate_img(haved_img, annotation, f_p, f_i, classify)
if __name__ == "__main__":
path = "17"
annotation = read_rol_to_dist(path)
cut_img(path, annotation)
print("完成skr~skr~")