def __init__(self, hdf_filename, test_pct=0.25, neg_bias=0.5, batch_size=64, normalize=False, malignancy_to_class=None, window_normalize=False):
neg_bias = 0.5 if neg_bias is None else neg_bias
self._hdf_filename = hdf_filename
self._neg_bias = neg_bias
self._test_pct = test_pct
self._batch_size = batch_size
self._test_location = 0
self._train_location = 0
self._test_indices = []
self._train_indices = []
self._malignancy_to_class = malignancy_to_class
self._normalize = normalize
self._Xmin = None
self._Xmax = None
self._window_normalize = window_normalize
if malignancy_to_class is not None and len(malignancy_to_class) != 6:
raise Exception("malignancy_class mapping must contain exactly 6 values, one for each malignancy level 0 - 5")
# Open the hdf file
self._hdf_file = h5py.File(self._hdf_filename, 'r')
# Get info on classes and makeup of the dataset by examining the y values (classes):
y = self._hdf_file['nodule_classes'].value
if self._malignancy_to_class is not None:
if malignancy_to_class is not None:
mal = self._hdf_file['nodule_malignancy']
for i in range(len(y)):
y[i] = [malignancy_to_class[int(mal[i])]]
if self._normalize:
self._Xmin = self._hdf_file['nodule_pixel_min']
self._Xmax = self._hdf_file['nodule_pixel_max']
n_examples = len(y)
negatives = [i for i in range(n_examples) if y[i] == [0]]
positives = [i for i in range(n_examples) if y[i][0] > 0]
neg_count = len(negatives)
pos_count = len(positives)
n_examples = neg_count + pos_count
neg_goal = int(min(neg_count, round(neg_bias * n_examples)))
pos_goal = n_examples - neg_goal
if pos_goal > pos_count:
neg_goal = int(round(pos_count * (1-neg_bias+0.5)))
pos_goal = pos_count
# print("Before: neg count: {0}; goal: {1} - pos count: {2}; goal: {3}".format(neg_count, neg_goal, pos_count, pos_goal))
# randomly choose neg_goal negatives and pos_goal positives:
selected_indices = list(np.random.choice(negatives, size=(min(neg_count,neg_goal)), replace=False))
selected_indices.extend(list(np.random.choice(positives, size=(min(pos_count, pos_goal)), replace=False)))
# print("n examples: {0}".format(len(selected_indices)))
n_examples = len(selected_indices)
np.random.shuffle(selected_indices)
test_examples = int(round(test_pct * n_examples))
# print("test_examples: {0}".format(test_examples))
self._test_indices = selected_indices[0:test_examples]
self._train_indices = selected_indices[test_examples:]
load_tumor_image_data.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录