def crop_and_store(frame, mouth_coordinates, name):
"""
Args:
1. frame: The frame which has to be cropped.
2. mouth_coordinates: The coordinates which help in deciding which region is to be cropped.
3. name: The path name to be used for storing the cropped image.
"""
# Find bounding rectangle for mouth coordinates
x, y, w, h = cv2.boundingRect(mouth_coordinates)
mouth_roi = frame[y:y + h, x:x + w]
h, w, channels = mouth_roi.shape
# If the cropped region is very small, ignore this case.
if h < 10 or w < 10:
return
resized = resize(mouth_roi, 32, 32)
cv2.imwrite(name, resized)
python类resize()的实例源码
data_preprocessing_autoencoder.py 文件源码
项目:AVSR-Deep-Speech
作者: pandeydivesh15
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def test_image(addr):
target = ['angry','disgust','fear','happy','sad','surprise','neutral']
font = cv2.FONT_HERSHEY_SIMPLEX
im = cv2.imread(addr)
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(gray,scaleFactor=1.1)
for (x, y, w, h) in faces:
cv2.rectangle(im, (x, y), (x+w, y+h), (0, 255, 0), 2,5)
face_crop = im[y:y+h,x:x+w]
face_crop = cv2.resize(face_crop,(48,48))
face_crop = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
face_crop = face_crop.astype('float32')/255
face_crop = np.asarray(face_crop)
face_crop = face_crop.reshape(1, 1,face_crop.shape[0],face_crop.shape[1])
result = target[np.argmax(model.predict(face_crop))]
cv2.putText(im,result,(x,y), font, 1, (200,0,0), 3, cv2.LINE_AA)
cv2.imshow('result', im)
cv2.imwrite('result.jpg',im)
cv2.waitKey(0)
def preprocess(image):
"""Takes an image and apply preprocess"""
# ????????????
image = cv2.resize(image, (data_shape, data_shape))
# ?? BGR ? RGB
image = image[:, :, (2, 1, 0)]
# ?mean?????float
image = image.astype(np.float32)
# ? mean
image -= np.array([123, 117, 104])
# ??? [batch-channel-height-width]
image = np.transpose(image, (2, 0, 1))
image = image[np.newaxis, :]
# ?? ndarray
image = nd.array(image)
return image
def videoize(func, args, src = 0, win_name = "Cam", delim_wait = 1, delim_key = 27):
cap = cv2.VideoCapture(src)
while(1):
ret, frame = cap.read()
# To speed up processing; Almost real-time on my PC
frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5)
frame = cv2.flip(frame, 1)
out = func(frame, args)
if out is None:
continue
out = cv2.resize(out, dsize=None, fx=1.4, fy=1.4)
cv2.imshow(win_name, out)
cv2.moveWindow(win_name, (s_w - out.shape[1])/2, (s_h - out.shape[0])/2)
k = cv2.waitKey(delim_wait)
if k == delim_key:
cv2.destroyAllWindows()
cap.release()
return
def add_text(img, text, text_top, image_scale):
"""
Args:
img (numpy array of shape (width, height, 3): input image
text (str): text to add to image
text_top (int): location of top text to add
image_scale (float): image resize scale
Summary:
Add display text to a frame.
Returns:
Next available location of top text (allows for chaining this function)
"""
cv2.putText(
img=img,
text=text,
org=(0, text_top),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.15 * image_scale,
color=(255, 255, 255))
return text_top + int(5 * image_scale)
logoPredictor.py 文件源码
项目:vehicle_brand_classification_CNN
作者: nanoc812
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def loadImgs(imgsfolder, rows, cols):
myfiles = glob.glob(imgsfolder+'*.jpg', 0)
nPics = len(myfiles)
X = np.zeros((nPics, rows, cols), dtype = 'uint8')
i = 0; imgNames = []
for filepath in myfiles:
sd = filepath.rfind('/'); ed = filepath.find('.'); filename = filepath[int(sd+1):int(ed)]
imgNames.append(filename)
temp = cv2.imread(filepath, 0)
if temp == None:
continue
elif temp.size < 1000:
continue
elif temp.shape == [rows, cols, 1]:
X[i,:,:] = temp
else:
X[i,:,:] = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC)
i += 1
return X, imgNames
def get_image_descriptor_for_image(image, model):
im = cv2.resize(image, (224, 224)).astype(np.float32)
dim_ordering = K.image_dim_ordering()
if dim_ordering == 'th':
# 'RGB'->'BGR'
im = im[::-1, :, :]
# Zero-center by mean pixel
im[0, :, :] -= 103.939
im[1, :, :] -= 116.779
im[2, :, :] -= 123.68
else:
# 'RGB'->'BGR'
im = im[:, :, ::-1]
# Zero-center by mean pixel
im[:, :, 0] -= 103.939
im[:, :, 1] -= 116.779
im[:, :, 2] -= 123.68
im = im.transpose((2, 0, 1))
im = np.expand_dims(im, axis=0)
inputs = [K.learning_phase()] + model.inputs
_convout1_f = K.function(inputs, [model.layers[33].output])
return _convout1_f([0] + [im])
def downscale(old_file_name):
img = cv2.imread(os.path.join(old_file_name))
new_file_name = (old_file_name
.replace('training', 'training_' + str(min_size))
.replace('validation', 'validation_' + str(min_size))
.replace('testing', 'testing_' + str(min_size))
)
height, width, _ = img.shape
if width > height:
new_width = int(1.0 * width / height * min_size)
new_height = min_size
else:
new_height = int(1.0 * height / width * min_size)
new_width = min_size
img_new = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
cv2.imwrite(new_file_name, img_new)
def resize_image(self,img,*args):
# unpacks width, height
height, width,_ = img.shape
print("Original size: {} {}".format(width, height))
count_times_resized = 0
while width > 500 or height > 500:
#if width > 300 or height > 300:
# divides images WxH by half
width = width / 2
height = height /2
count_times_resized += 1
# prints x times resized to console
if count_times_resized != 0:
print("Resized {}x smaller, to: {} {}".format(count_times_resized*2,width, height))
# makes sures image is not TOO small
if width < 300 and height < 300:
width = width * 2
height = height * 2
img = cv2.resize(img,(int(width),int(height)))
return img
def switch_camara(self):
self.activo = not self.activo
if self.activo:
# Capturo el primer frame para quedarme con el tamano y el factor de resize
ret,frame = self.cap.read(self.camera_id)
self.activo = ret
if ret:
self.img_height, self.img_width, self.img_channels = frame.shape
self.img_zoomx = 320.0/self.img_width
self.img_zoomy = 200.0/self.img_height
# Ya tengo los datos. Capturo la imagen final y me quedo con el frame
self.captura_frame()
else:
self.status = "No puedo encontrar la camara"
print "No encuentro la camara!!!!"
def decorate_img_for_env(img, env_id, image_scale):
"""
Args:
img (numpy array of (width, height, 3)): input image
env_id (str): the gym env id
image_scale (float): a scale to resize the image
Returns:
an image
Summary:
Adds environment specific image decorations. Currently used to make it easier to
block/label in Pong.
"""
if env_id is not None and 'Pong' in env_id:
h, w, _ = img.shape
est_catastrophe_y = h - 142
est_block_clearance_y = est_catastrophe_y - int(20 * image_scale)
# cv2.line(img, (0, est_catastrophe_y), (int(500 * image_scale), est_catastrophe_y), (0, 0, 255))
cv2.line(img, (250, est_catastrophe_y), (int(500 * image_scale), est_catastrophe_y), (0, 255, 255))
# cv2.line(img, (0, est_block_clearance_y), (int(500 * image_scale), est_block_clearance_y),
# (255, 0, 0))
return img
def detect(self, img):
img_h, img_w, _ = img.shape
inputs = cv2.resize(img, (self.image_size, self.image_size))
inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
inputs = (inputs / 255.0) * 2.0 - 1.0
inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))
result = self.detect_from_cvmat(inputs)[0]
for i in range(len(result)):
result[i][1] *= (1.0 * img_w / self.image_size)
result[i][2] *= (1.0 * img_h / self.image_size)
result[i][3] *= (1.0 * img_w / self.image_size)
result[i][4] *= (1.0 * img_h / self.image_size)
return result
def loadLogoSet(path, rows,cols,test_data_rate=0.15):
random.seed(612)
_, imgID = readItems('data.txt')
y, _ = modelDict(path)
nPics = len(y)
faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images
noImg = []
for i in range(nPics):
temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0)
if temp == None:
noImg.append(i)
elif temp.size < 1000:
noImg.append(i)
else:
temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC)
faceassset[i,:,:] = temp
y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0)
nPics = len(y)
index = random.sample(np.arange(nPics), int(nPics*test_data_rate))
x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0)
y_test = y[index]; y_train = np.delete(y, index, 0)
return (x_train, y_train), (x_test, y_test)
def crop(image, name, crop_size, padding_size):
(width, height) = image.shape
cropped_images = []
for i in xrange(0, width, padding_size):
for j in xrange(0, height, padding_size):
box = (i, j, i+crop_size, j+crop_size) #left, upper, right, lower
cropped_name = name + '_' + str(i) + '_' + str(j) + '.jpg'
cropped_image = image[i:i+crop_size, j:j+crop_size]
resized_image = cv2.resize(cropped_image, (IMAGE_SIZE, IMAGE_SIZE))
cropped_images.append(resized_image)
return cropped_images
# ????
# ???????????????????????????????????data_num?
def get_conv_image_descriptor_for_image(image, model):
im = cv2.resize(image, (224, 224)).astype(np.float32)
dim_ordering = K.image_dim_ordering()
if dim_ordering == 'th':
# 'RGB'->'BGR'
im = im[::-1, :, :]
# Zero-center by mean pixel
im[0, :, :] -= 103.939
im[1, :, :] -= 116.779
im[2, :, :] -= 123.68
else:
# 'RGB'->'BGR'
im = im[:, :, ::-1]
# Zero-center by mean pixel
im[:, :, 0] -= 103.939
im[:, :, 1] -= 116.779
im[:, :, 2] -= 123.68
im = im.transpose((2, 0, 1))
im = np.expand_dims(im, axis=0)
inputs = [K.learning_phase()] + model.inputs
_convout1_f = K.function(inputs, [model.layers[31].output])
return _convout1_f([0] + [im])
def get_batch():
ran = random.randint(600, data_size)
#print(ran)
image = []
label = []
label_0 = []
n_pic = ran
# print(n_pic)
for i in range(batch_size * n_steps):
frame_0 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic+i), 0)
frame_0 = cv2.resize(frame_0, (LONGITUDE, LONGITUDE))
frame_0 = np.array(frame_0).reshape(-1)
image.append(frame_0)
#print(np.shape(image))
for i in range(batch_size):
frame_1 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic + batch_size * (i+1) ), 0)
frame_1 = cv2.resize(frame_1, (LONGITUDE, LONGITUDE))
frame_1 = np.array(frame_1).reshape(-1)
label.append(frame_1)
for i in range(batch_size):
frame_2 = cv2.imread('./cropedoriginalUS2/%d.jpg' % (n_pic + batch_size * (i+1) ), 0)
frame_2 = cv2.resize(frame_2, (LONGITUDE, LONGITUDE))
frame_2 = np.array(frame_2).reshape(-1)
label_0.append(frame_2)
return image , label , label_0
def get_batch(batch_size=20,data_size=6498):
ran = np.random.choice(data_size, batch_size,replace=False)
image=[]
outline=[]
for i in range(batch_size):
n_pic=ran[i]
#print(n_pic)
frame_0 = cv2.imread('./cropPicY/%d.jpg' % n_pic,0)
frame_0 = cv2.resize(frame_0, (24, 24))
frame_0 = np.array(frame_0).reshape(-1)
# print('np',frame_0)
# frame_0 = gray2binary(frame_0)
#print (frame_0)
frame_1 = cv2.imread('./cropPicX/%d.jpg' % n_pic, 0)
frame_1 = cv2.resize(frame_1, (24, 24))
frame_1 = np.array(frame_1).reshape(-1)
frame_1 = gray2binary(frame_1)
image.append(frame_0)
outline.append(frame_1)
#print(image)
return np.array(image),np.array(outline)
def get_train_batch(noise=0):
ran = random.randint(600, data_size)
#print(ran)
image = []
label = []
label_0 = []
n_pic = ran
# print(n_pic)
for i in range(batch_size ):
frame_0 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic+i), 0)
frame_0 = add_noise(frame_0, n = noise)
frame_0 = cv2.resize(frame_0, (LONGITUDE, LONGITUDE))
frame_0 = np.array(frame_0).reshape(-1)
image.append(frame_0)
#print(np.shape(image))
for i in range(batch_size):
frame_1 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic + batch_size * (i+1) ), 0)
frame_1 = cv2.resize(frame_1, (LONGITUDE, LONGITUDE))
frame_1 = np.array(frame_1).reshape(-1)
label.append(frame_1)
return image , label
def get_batch(batch_size=20,data_size=6498):
ran = np.random.choice(data_size, batch_size,replace=False)
image=[]
for i in range(batch_size):
n_pic=ran[i]
#print(n_pic)
frame_0 = cv2.imread('./cropPicX/%d.jpg' % n_pic,0)
frame_0 = cv2.resize(frame_0, (24, 24))
frame_0 = np.array(frame_0).reshape(-1)
image.append(frame_0)
#print(image)
return np.array(image)
# Visualize decoder setting
# Parameters
1.1.1autoencoder_self.py 文件源码
项目:US-image-prediction
作者: ChengruiWu008
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def get_batch(batch_size=20,data_size=6498):
ran = np.random.choice(data_size, batch_size,replace=False)
image=[]
outline=[]
for i in range(batch_size):
n_pic=ran[i]
#print(n_pic)
frame_0 = cv2.imread('./easyPixelImage2/%d.jpg' % n_pic,0)
frame_0 = cv2.resize(frame_0, (24, 24))
frame_0 = np.array(frame_0).reshape(-1)
# print('np',frame_0)
# frame_0 = gray2binary(frame_0)
#print (frame_0)
frame_1 = cv2.imread('./easyPixelImage2/%d.jpg' % n_pic, 0)
frame_1 = cv2.resize(frame_1, (24, 24))
frame_1 = np.array(frame_1).reshape(-1)
frame_1 = gray2binary(frame_1)
image.append(frame_0)
outline.append(frame_1)
#print(image)
return np.array(image),np.array(outline)
def get_train_batch(noise=500):
ran = np.random.randint(600,5800,size=10,dtype='int')
#print(ran)
image = []
label = []
label_0 = []
n_pic = ran
# print(n_pic)
for i in range(10):
frame_0 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic[i]), 0)
frame_0 = add_noise(frame_0, n = noise)
frame_0 = cv2.resize(frame_0, (24, 24))
frame_0 = np.array(frame_0).reshape(-1)
frame_0 = frame_0 / 255.0
image.append(frame_0)
#print(np.shape(image))
for i in range(10):
frame_1 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic[i]), 0)
frame_1 = cv2.resize(frame_1, (24, 24))
frame_1 = np.array(frame_1).reshape(-1)
frame_1 = gray2binary(frame_1)
label.append(frame_1)
return np.array(image,dtype='float') , np.array(label,dtype='float')
def get_test_batch(noise=500):
ran = np.random.randint(5800,6000,size=10,dtype='int')
#print(ran)
image = []
label = []
label_0 = []
n_pic = ran
# print(n_pic)
for i in range(10):
frame_0 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic[i]), 0)
frame_0 = add_noise(frame_0, n = noise)
frame_0 = cv2.resize(frame_0, (24, 24))
frame_0 = np.array(frame_0).reshape(-1)
frame_0 = frame_0 / 255.0
image.append(frame_0)
#print(np.shape(image))
for i in range(10):
frame_1 = cv2.imread('./cropedoriginalPixel2/%d.jpg' % (n_pic[i]), 0)
frame_1 = cv2.resize(frame_1, (24, 24))
frame_1 = np.array(frame_1).reshape(-1)
frame_1 = gray2binary(frame_1)
label.append(frame_1)
return np.array(image,dtype='float') , np.array(label,dtype='float')
def get_data(datadir):
#datadir = args.data
# assume each image is 512x256 split to left and right
imgs = glob.glob(os.path.join(datadir, '*.jpg'))
data_X = np.zeros((len(imgs),3,img_cols,img_rows))
data_Y = np.zeros((len(imgs),3,img_cols,img_rows))
i = 0
for file in imgs:
img = cv2.imread(file,cv2.IMREAD_COLOR)
img = cv2.resize(img, (img_cols*2, img_rows))
#print('{} {},{}'.format(i,np.shape(img)[0],np.shape(img)[1]))
img = np.swapaxes(img,0,2)
X, Y = split_input(img)
data_X[i,:,:,:] = X
data_Y[i,:,:,:] = Y
i = i+1
return data_X, data_Y
def display(self, frame, face_locations):
"""
- Display results on screen with bboxes
:param frame: window frame
:return: window with resulting predictions on faces
"""
# Display the results
scale = 1
if self.resize:
scale = 4
if not len(face_locations) == 0: # nothing detected
for (top, right, bottom, left) in face_locations:
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top * scale
right * scale
bottom * scale
left * scale
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 255), 2)
# else
def get_batch_idx(self, idx):
hh = self.inp_height
ww = self.inp_width
x = np.zeros([len(idx), hh, ww, 3], dtype='float32')
orig_height = []
orig_width = []
ids = []
for kk, ii in enumerate(idx):
fname = self.ids[ii]
ids.append('{:06}'.format(ii))
x_ = cv2.imread(fname).astype('float32') / 255
x[kk] = cv2.resize(
x_, (self.inp_width, self.inp_height),
interpolation=cv2.INTER_CUBIC)
orig_height.append(x_.shape[0])
orig_width.append(x_.shape[1])
pass
return {
'x': x,
'orig_height': np.array(orig_height),
'orig_width': np.array(orig_width),
'id': ids
}
def RandomCrop(rand_seed,img, top,left,height=224, width=224,u=0.5,aug_factor=9/8):
#first zoom in by a factor of aug_factor of input img,then random crop by(height,width)
# if rand_seed < u:
if 1:
# h,w,c = img.shape
# img = cv2.resize(img, (round(aug_factor*w), round(aug_factor*h)), interpolation=cv2.INTER_LINEAR)
# h, w, c = img.shape
new_h, new_w = height,width
# top = np.random.randint(0, h - new_h)
# left = np.random.randint(0, w - new_w)
img = img[top: top + new_h,
left: left + new_w]
return img
def __init__(self, dpt, fx, fy, importer=None, refineNet=None):
"""
Constructor
:param dpt: depth image
:param fx: camera focal lenght
:param fy: camera focal lenght
"""
self.dpt = dpt
self.maxDepth = min(1500, dpt.max())
self.minDepth = max(10, dpt.min())
# set values out of range to 0
self.dpt[self.dpt > self.maxDepth] = 0.
self.dpt[self.dpt < self.minDepth] = 0.
# camera settings
self.fx = fx
self.fy = fy
# Optional refinement of CoM
self.refineNet = refineNet
self.importer = importer
# depth resize method
self.resizeMethod = self.RESIZE_CV2_NN
def resizeCrop(self, crop, sz):
"""
Resize cropped image
:param crop: crop
:param sz: size
:return: resized image
"""
if self.resizeMethod == self.RESIZE_CV2_NN:
rz = cv2.resize(crop, sz, interpolation=cv2.INTER_NEAREST)
elif self.resizeMethod == self.RESIZE_BILINEAR:
rz = self.bilinearResize(crop, sz, self.getNDValue())
elif self.resizeMethod == self.RESIZE_CV2_LINEAR:
rz = cv2.resize(crop, sz, interpolation=cv2.INTER_LINEAR)
else:
raise NotImplementedError("Unknown resize method!")
return rz
def get_whole_rotated_image(crop, mask, angle, crop_size, before_rotate_size, scale):
#Better for larger:
#pixels_to_jitter = 35 * scale
#For Dates:
pixels_to_jitter = 4 #Old Way
center_x = before_rotate_size / 2 + (random.random() * pixels_to_jitter * 2) - pixels_to_jitter
center_y = before_rotate_size / 2 + (random.random() * pixels_to_jitter * 2) - pixels_to_jitter
rot_image = crop.copy()
rot_image = rotate(rot_image, angle, center_x, center_y, before_rotate_size, before_rotate_size)
# This is hard coded for 28x28.
rot_image = cv2.resize(rot_image, (41, 41), interpolation=cv2.INTER_AREA)
rot_image = rot_image[6:34, 6:34]
# rot_image = rot_image * mask
return rot_image
def resize(im, target_size, max_size):
"""
only resize input image to target size and return scale
:param im: BGR image input by opencv
:param target_size: one dimensional size (the short side)
:param max_size: one dimensional max size (the long side)
:return:
"""
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# prevent bigger axis from being more than max_size:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
return im, im_scale