Camelyon Challenge:癌症细胞区域检测_数据预处理_ASAP



目录

  • 数据集
  • 软件
  • 处理数据集

####数据集
从官方网站,获取数据集,国内可以选择百度网盘

####软件
获取软件ASAP
本文基于ubuntu系统,所以下载deb文件,双击安装后,然后拷贝根目录下/opt/ASAP到自己建立的操作目录下.便于我们在python代码中调用.

import sys
sys.path.append("ASAP/bin")
import multiresolutionimageinterface as mir

####处理数据集

结果图

所需准备

import os
import re
import json
import xmltodict
import numpy as np
import pandas as pd

import cv2
import sys
sys.path.append("ASAP/bin")
import multiresolutionimageinterface as mir

def read_rol_to_dist(path):
    save = os.path.join(path, 'annotation.json')
    if not os.path.exists(save):
        f_a = "lesion_annotations"
        folder = os.path.join(path, f_a)
        f_list = os.listdir(folder)
        annotation = {}
        for i in f_list:
            name = i.split(".")[0]
            d = {}
            t = os.path.join(folder, i)
            with open(t, 'r') as f:
                xml = f.read()
            xmlparse = xmltodict.parse(xml)
            jsonstr = json.dumps(xmlparse,indent=4)
            data = json.loads(jsonstr)

            judge = data["ASAP_Annotations"]["Annotations"]["Annotation"]
            if type(judge) == type({}):
                d[judge['@Name']] = judge['Coordinates']
            else:
                for j in judge:
                    d[j['@Name']] = j['Coordinates']
            annotation[name] = d

        with open(save, 'w') as f:
            json.dump(annotation, f, indent=4)

    with open(save, 'r') as ff:
        json_data = json.load(ff)
    return json_data

def need_folder(f_p, f_i):
    if not os.path.exists(f_p):
        raise Exception("请解压缩在:{} 中".format(f_p))
    if not os.path.exists(f_i):
        os.mkdir(f_i) 

def create_store(f_p, f_i):
    f_list = os.listdir(f_p)
    for i in f_list:
        if "." in i:
            pass
        else:
            t = os.path.join(f_i, i)
            if not os.path.exists(t):
                os.mkdir(t) 

def img_have(f_p, f_i):
    f_list = os.listdir(f_p)
    haved_img = {}
    for patient in f_list:
        if "." not in patient:
            path = os.path.join(f_p, patient)
            patient_img_list = os.listdir(path)
            for i in patient_img_list:
                haved_img[i.split(".")[0]] = os.path.join(path, i)
    return haved_img

def handle_c(path):
    f_csv = os.path.join(path, "stage_labels.csv")
    classify = pd.read_csv(f_csv)
    _ = list(classify.loc[:,'patient'])
    patient = []
    for i in _ :
        patient.append(i.split(".")[0])
    stage = list(classify.loc[:,'stage'])
    r_c = {}
    for i in range(len(patient)):
        r_c[patient[i]] = stage[i]
    return r_c

def calculate_border(coordinate):
    x = []
    y = []
    c_list = coordinate["Coordinate"]
    if type(c_list) == type({}):
        print("存在医生误点,一个标记只有一个坐标")
        print(c_list)
        return 0, 0, 0, 0
    else:
        for i in c_list:
            x.append(float(i["@X"]))
            y.append(float(i["@Y"]))
        bbox = [min(x), min(y), max(x), max(y)]
        return 1, bbox, x , y 

def calculate_left_top(bbox, extend):
    width = bbox[2] - bbox[0]
    height = bbox[3] - bbox[1]
    left_top = (bbox[0], bbox[1] )
    l_f = (left_top[0]-(extend/2) , left_top[1] - (extend/2) )
    w = width +extend
    h = height + extend
    l = max(w, h)
    e_p =  [l_f, l]
    return e_p

def relative_coordinate(bbox, x_c_list, y_c_list, extend):
    origin = (bbox[0], bbox[1] )
    relative_x = []
    relative_y = []
    assert len(relative_x) == len(relative_y)
    for i in range(len(x_c_list)):
        relative_x.append(x_c_list[i]-origin[0] + extend/2)
        relative_y.append(y_c_list[i]-origin[1] + extend/2)

    d2_list = []
    for i in range(len(relative_x)):
        t = [relative_x[i], relative_y[i]]
        d2_list.append(t)
    return d2_list

def img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img):
    stage = classify[node_key]
    i_p = haved_img[node_key].split("/")
    if source_img == 0:
        save_name = node_key + "_" + stage + "_" + num + ".jpg"
    else:
        save_name = node_key + "_" + stage + "__" + num + ".jpg"
    save_p = os.path.join(f_i, i_p[2], save_name)
    cv2.imwrite(save_p, image_patch)

def json_save(d2_list, haved_img, node_key, num, f_p, f_i, classify):
    stage = classify[node_key]
    i_p = haved_img[node_key].split("/")
    save_name = node_key + "_" + stage + "_" + num + ".json"
    save_p = os.path.join(f_i, i_p[2], save_name)
    d = {"coordinate":d2_list}
    with open(save_p, 'w') as f:
        json.dump(d, f, indent=4)

def operate(f_p, f_i, node_key, num, coordinate, haved_img, classify):
    img_name = haved_img[node_key]
    reader = mir.MultiResolutionImageReader()
    mr_image = reader.open(img_name)
    level = 0   #倒金字塔型图片, level=0 为最大
    ds = mr_image.getLevelDownsample(level)
    judge, bbox, x_c_list, y_c_list = calculate_border(coordinate)
    if judge == 1:
        extend = 100
        position = calculate_left_top(bbox, extend)
        source_img = 1
        image_patch = mr_image.getUCharPatch(int(position[0][0]* ds), int(position[0][1]* ds), int(position[1]), int(position[1]), level)
        img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img)
        d2_list = relative_coordinate(bbox, x_c_list, y_c_list, extend)
        json_save(d2_list, haved_img, node_key, num, f_p, f_i, classify)
        points=np.array(d2_list,np.int32)  #多边形的顶点坐标
        cv2.polylines(image_patch,[points],True,(0,0,255),thickness = 2)  #画任意多边形
        source_img = 0
        # cv2.imshow("1", image_patch)
        # cv2.waitKey(0)  
        img_save(image_patch, haved_img, node_key, num, f_p, f_i, classify, source_img)

def generate_img(haved_img, annotation, f_p, f_i, classify):
    for node_key in haved_img:
        try:
            judge = annotation[node_key]
        except KeyError:
            continue
        else:
            for per_img in judge:
                num = re.sub(r'\D', "", per_img)
                coordinate = judge[per_img]
                operate(f_p, f_i, node_key, num, coordinate, haved_img, classify)

def  cut_img(path, annotation):
    f_p = os.path.join(path, "patient")
    f_i = os.path.join(path, "image")
    need_folder(f_p, f_i)
    create_store(f_p, f_i)
    haved_img = img_have(f_p, f_i)
    classify = handle_c(path)
    generate_img(haved_img, annotation, f_p, f_i, classify)

if __name__ == "__main__":
    path = "17"
    annotation = read_rol_to_dist(path)
    cut_img(path, annotation)
    print("完成skr~skr~")

文章作者: Jerry
版权声明: 本博客所有文章除特別声明外,均采用 CC BY-NC 4.0 许可协议。转载请注明来源 Jerry !
 上一篇
工程小知识 工程小知识
main loop把主循环(main loop)简单的谈一下。GUI应用程序都是事件驱动的。比如键盘事件、鼠标事件等等。还有一些事件来自于系统内部,比如定时事件、其它文件事件等等。在没有任何事件的情况下,应用程序处于睡眠状态。这种事件驱动机
2020-06-06
下一篇 
《统计学习方法》李航老师 《统计学习方法》李航老师
前言 理论学习: 学习博客: Dodo 作者Github: 算法 调包快速实现: 工具: scikit-learn 兴趣才是学习动力 可视化:机器学习 札记 感知机 (perceptron) 随机梯度下降 逻辑斯蒂 (log
  目录