进入ModelArts
点击如下链接:https://www.huaweicloud.com/product/modelarts.html , 进入ModelArts主页。点击“立即使用”按钮,输入用户名和密码登录,进入ModelArts使用页面。
创建ModelArts notebook
下面,我们在ModelArts中创建一个notebook开发环境,ModelArts notebook提供网页版的Python开发环境,可以方便的编写、运行代码,并查看运行结果。
第一步:在ModelArts服务主界面依次点击“开发环境”、“创建”
第二步:填写notebook所需的参数:
第三步:配置好notebook参数后,点击下一步,进入notebook信息预览。确认无误后,点击“立即创建”
第四步:创建完成后,返回开发环境主界面,等待Notebook创建完毕后,打开Notebook,进行下一步操作。
在ModelArts中创建开发环境
接下来,我们创建一个实际的开发环境,用于后续的实验步骤。
第一步:点击下图所示的“启动”按钮,加载后“打开”按钮变从灰色变为蓝色后点击“打开”进入刚刚创建的Notebook
第二步:创建一个Python3环境的的Notebook。点击右上角的"New",然后选择TensorFlow 1.13.1开发环境。
第三步:点击左上方的文件名"Untitled",并输入一个与本实验相关的名称,如"age_prediction"
在Notebook中编写并执行代码
在Notebook中,我们输入一个简单的打印语句,然后点击上方的运行按钮,可以查看语句执行的结果:
开发环境准备好啦,接下来可以愉快地写代码啦!
案例内容
MTCNN模型简介
MTCNN(Multi-task convolutional neural network) 中文名称是多任务卷积神经网络,可以用来做人脸区域检测和人脸对齐。在人脸检测中会面临很多不同的问题:遮挡,角度倾斜等。传统方法中,大多使用机器学习的方法,而在MTCNN中,使用深度学习方法结合NMS和边界框回归,将人脸区域坐标和关键点坐标进行识别,相比较机器学习方法,MTCNN能更好地识别不同情况下的人脸。
MTCNN模型的详解可以参考:https://kpzhang93.github.io/MTCNN_face_detection_alignment 。
import os
from modelarts.session import Session
sess = Session()
if sess.region_name == 'cn-north-1':
bucket_path="modelarts-labs/notebook/DL_face_age_prediction/ssr.tar.gz"
elif sess.region_name == 'cn-north-4':
bucket_path="modelarts-labs-bj4/notebook/DL_face_age_prediction/ssr.tar.gz"
else:
print("请更换地区到北京一或北京四")
if not os.path.exists("./src/align"):
sess.download_data(bucket_path=bucket_path, path="./ssr.tar.gz")
if os.path.exists('./ssr.tar.gz'):
# 使用tar命令解压资源包
os.system("tar -xf ./ssr.tar.gz")
# 清理压缩包
os.system("rm ./ssr.tar.gz")
!pip install mtcnn==0.0.8
!pip install numpy==1.16.2
import numpy as np
import cv2
import tensorflow as tf
import random
from PIL import Image
这里我们提供了一张测试图片,你也可以上传自己的图片进行测试,通过notebook upload
功能上传测试图片,并将image_path
改为图片路径即可。
image_path = "./test.jpg"
img = Image.open(image_path)
img = np.array(img)
调用mtcnn库,进行人脸区域检测,并显示检测结果
from mtcnn.mtcnn import MTCNN as mtcnn
detector = mtcnn()
detected = detector.detect_faces(img)
# 打印检测结果
detected
将检测结果绘制在图片上
# 绘图部分
box = detected[0]["box"]
res_img = cv2.rectangle(img, (box[0],box[1]),(box[0]+box[2],box[1]+box[3]), 0, 1)
keypoints = detected[0]["keypoints"]
res_img = cv2.circle(res_img, keypoints['left_eye'], 1, 255, 4)
res_img = cv2.circle(res_img, keypoints['right_eye'], 1, 255, 4)
res_img = cv2.circle(res_img, keypoints['nose'], 1, 255, 4)
res_img = cv2.circle(res_img, keypoints['mouth_left'], 1, 255, 4)
res_img = cv2.circle(res_img, keypoints['mouth_right'], 1, 255, 4)
res_img = Image.fromarray(res_img)
res_img
MTCNN模型实现
MTCNN 流程总览
MTCNN网络: MTCNN网络分为三部分:PNet RNet ONet
卷积网络生成3部分结果:人脸/非人脸分类分类结果,人脸边界框以及人脸关键点位置。
数据依次经过PNet,RNet和ONet,每经过一组网络,就进行一次nms和边界框回归,最后在ONet网络输出中获得检测结果,人脸区域坐标及人脸关键点坐标。
NMS(non maximum suppression)非极大值抑制 当我们进行人脸检测时,可能会对同一张人脸区域有多个边界框检测结果,虽然这些检测结果都有很高的置信度,但是我们只需要置信度最高的检测结果,所以进行局部最大值检测,将不是最大值的预测结果去掉,完成边界框筛选的任务。NMS被应用在很多目标检测模型当中,例如R-CNN,Faster R-CNN,Mask R-CNN等。
接下来,我们使用代码搭建MTCNN
神经网络结构。
我们将MTCNN的实现分为PNet,RNet,ONet的顺序进行讲解,每一部分包括模型的结构以及运行的效果。
from src.align.detect_face import Network
from src.align.detect_face import rerec, pad
from src.align.detect_face import nms
from src.align.detect_face import imresample
from src.align.detect_face import generateBoundingBox
class PNet(Network):
def setup(self):
(self.feed('data')
.conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='PReLU1')
.max_pool(2, 2, 2, 2, name='pool1')
.conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='PReLU2')
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='PReLU3')
.conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
.softmax(3,name='prob1'))
(self.feed('PReLU3')
.conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
class RNet(Network):
def setup(self):
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
.conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='prelu1')
.max_pool(3, 3, 2, 2, name='pool1')
.conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='prelu2')
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
.conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='prelu3')
.fc(128, relu=False, name='conv4')
.prelu(name='prelu4')
.fc(2, relu=False, name='conv5-1')
.softmax(1,name='prob1'))
(self.feed('prelu4') #pylint: disable=no-value-for-parameter
.fc(4, relu=False, name='conv5-2'))
class ONet(Network):
def setup(self):
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='prelu1')
.max_pool(3, 3, 2, 2, name='pool1')
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='prelu2')
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='prelu3')
.max_pool(2, 2, 2, 2, name='pool3')
.conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
.prelu(name='prelu4')
.fc(256, relu=False, name='conv5')
.prelu(name='prelu5')
.fc(2, relu=False, name='conv6-1')
.softmax(1, name='prob1'))
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
.fc(4, relu=False, name='conv6-2'))
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
.fc(10, relu=False, name='conv6-3'))
# 打开原图
test_img = Image.open(image_path)
test_img
# 进行图片预处理
test_img = np.array(test_img)
img_size = np.asarray(test_img.shape)[0:2]
factor_count=0
minsize = 20
total_boxes=np.empty((0,9))
points=np.empty(0)
h=test_img.shape[0] # h=410
w=test_img.shape[1] # w=599
minl=np.amin([h, w]) # minl = [410,599] 中最小值 410
m=12.0/minsize # m=12/20
minl=minl*m # minl = 410*12/20 = 410* 0.6
factor = 0.709
scales=[]
while minl>=12:
scales += [m*np.power(factor, factor_count)]
minl = minl*factor
factor_count += 1
# first stage
for scale in scales:
hs=int(np.ceil(h*scale)) #大于等于该值的最小整数
ws=int(np.ceil(w*scale))
im_data = cv2.resize(test_img, (ws, hs), interpolation=cv2.INTER_AREA)
im_data = (im_data-127.5)*0.0078125
img_x = np.expand_dims(im_data, 0)
img_y = np.transpose(img_x, (0,2,1,3))
with tf.Graph().as_default():
with tf.Session() as sess:
with tf.variable_scope('pnet'):
data = tf.placeholder(tf.float32, shape=(None, None, None, 3), name="input")
pnet = PNet({'data':data})
pnet.load("./src/align/PNet.npy", sess)
out = sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img_y})
# boundingbox regression 结果
out0 = np.transpose(out[0], (0,2,1,3))
# face classification 结果
out1 = np.transpose(out[1], (0,2,1,3))
threshold = 0.5
boxes, reg = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold)
print("PNet产生结果为:"+str(boxes.shape))
total_boxes = boxes.copy()
# 边界框绘制函数
def draw_bboxes(img, total_boxes):
for i in range(total_boxes.shape[0]):
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
x1 = int(total_boxes[:,0][i])
y1 = int(total_boxes[:,1][i])
x2= int(total_boxes[:,2][i])
y2 = int(total_boxes[:,3][i])
img = cv2.rectangle(img,(x1,y1),(x2,y2), (r,g,b), 2)
return img
将PNet预测结果进行筛选和回归,结果绘制在图片上
img = Image.open(image_path)
img = np.array(img)
Image.fromarray(draw_bboxes(img,total_boxes))
total_boxes=np.empty((0,9))
pick = nms(boxes.copy(), 0.7, 'Union')
if boxes.size>0 and pick.size>0:
boxes = boxes[pick,:]
total_boxes = np.append(total_boxes, boxes, axis=0)
print("筛选之后结果为:"+str(total_boxes.shape))
# 绘制筛选后的边界框
img = Image.open(image_path)
img = np.array(img)
# 进行nms计算 参数为0.7
pick = nms(total_boxes.copy(), 0.6, 'Union')
total_boxes = total_boxes[pick,:]
print(total_boxes.shape)
# 边界框回归
regw = total_boxes[:,2]-total_boxes[:,0]
regh = total_boxes[:,3]-total_boxes[:,1]
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
print(total_boxes.shape)
img = Image.open(image_path)
img = np.array(img)
# 将边界框形状转为正方形
total_boxes = rerec(total_boxes.copy())
print(total_boxes)
# 将边界框坐标整理成整数
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
print(total_boxes)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
img = Image.open(image_path)
img = np.array(img)
Image.fromarray(draw_bboxes(img,total_boxes))
numbox = total_boxes.shape[0]
tempimg = np.zeros((24,24,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (24, 24))
else:
print(0)
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
with tf.Graph().as_default():
with tf.Session() as sess:
with tf.variable_scope('rnet'):
data = tf.placeholder(tf.float32, shape=(None, 24, 24, 3), name="input")
rnet = RNet({'data':data})
rnet.load("./src/align/RNet.npy", sess)
out = sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':tempimg1})
# 检测到的人脸坐标
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
score = out1[1,:]
threshold = 0.7
ipass = np.where(score>0.2)
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
if total_boxes.shape[0]>0:
pick = nms(total_boxes, threshold, 'Union')
total_boxes = total_boxes[pick,:]
print(total_boxes)
img = Image.open(image_path)
img = np.array(img)
from src.align.detect_face import bbreg
# 边界框回归
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
print(total_boxes)
# 边界框整理成正方形
total_boxes = rerec(total_boxes.copy())
print(total_boxes)
img = Image.open(image_path)
img = np.array(img)
Image.fromarray(draw_bboxes(img,total_boxes))
numbox = total_boxes.shape[0]
total_boxes = np.fix(total_boxes).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
tempimg = np.zeros((48,48,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (48, 48))
else:
print(0)
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
with tf.Graph().as_default():
with tf.Session() as sess:
with tf.variable_scope('onet'):
data = tf.placeholder(tf.float32, shape=(None, 48, 48, 3), name="input")
onet = ONet({'data':data})
rnet.load("./src/align/ONet.npy", sess)
out = sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':tempimg1})
# 人脸区域边界框预测结果
out0 = np.transpose(out[0])
# 人脸关键点预测结果
out1 = np.transpose(out[1])
# 人脸区域置信度
out2 = np.transpose(out[2])
score = out2[1,:]
points = out1
# threshold = 0.7
ipass = np.where(score>0.7)
points = points[:,ipass[0]]
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
w = total_boxes[:,2]-total_boxes[:,0]+1
h = total_boxes[:,3]-total_boxes[:,1]+1
points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
if total_boxes.shape[0]>0:
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
pick = nms(total_boxes.copy(), 0.7, 'Min')
total_boxes = total_boxes[pick,:]
points = points[:,pick]
img = Image.open(image_path)
img = np.array(img)
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
point_color = (r, g, b)
for i in range(5):
cv2.circle(img,(int(points[i]),int(points[i+5])),1, point_color, 4)
Image.fromarray(draw_bboxes(img,total_boxes))
年龄预测
我们使用SSR-Net
模型预测年龄,该模型的论文见此链接。