Mediapipe 手掌特徵點偵測 ( Hand Landmark Detection )
MediaPipe 的 Hand Landmark Detection 可以在檢測雙手的手掌,再透過 OpenCV 讀取攝影鏡頭影像進行辨識,在手掌與每隻手指標記骨架。
快速導覽:
因為程式使用 Jupyter 搭配 Tensorflow 進行開發,所以請先閱讀「使用 Anaconda」和「使用 MediaPipe」,安裝對應的套件,如果不要使用 Juputer,也可參考「使用 Python 虛擬環境」,建立虛擬環境進行實作。
偵測手掌資訊
MediaPipe Hands 利用多個模型協同工作,可以偵測手掌模型,返回手掌與每隻手指精確的 3D 關鍵點,MediaPipe Hand 除了可以偵測清晰的手掌形狀與動作,更可以判斷出被少部分被遮蔽的手指形狀和動作,再清晰的畫面下,針對手掌判斷的精準度可達 95.7%。
Mediapipe 偵測手掌後,會在手掌與手指上產生 21 個具有 x、y、z 座標的節點,透過包含立體深度的節點,就能在 3D 場景中做出多種不同的應用,下圖標示出每個節點的順序和位置 ( 圖片來源 )。
下方的程式碼延伸「讀取並播放影片」文章的範例,搭配 mediapipe 手掌偵測的方法,透過攝影鏡頭獲取影像後,即時顯示手掌的資訊。
import cv2
import mediapipe as mp
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
# 偵測手掌設定
options = HandLandmarkerOptions(
num_hands=2,
base_options=BaseOptions(model_asset_path='model/hand_landmarker.task'),
running_mode=VisionRunningMode.IMAGE)
with HandLandmarker.create_from_options(options) as landmarker:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
ret, frame = cap.read()
w = frame.shape[1] # 畫面寬度
h = frame.shape[0] # 畫面高度
if not ret:
print("Cannot receive frame")
break
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
hand_landmarker_result = landmarker.detect(mp_image)
print(hand_landmarker_result)
cv2.imshow('oxxostudio', frame)
if cv2.waitKey(5) == ord('q'):
break # 按下 q 鍵停止
cap.release()
cv2.destroyAllWindows()
即時繪製手掌骨架
偵測到手掌後,參考「Hand Landmarks Detection with MediaPipe Tasks」範例程式碼,加入標記繪圖的函式區塊,就可以在偵測到手掌時,即時繪製手掌的節點。
import cv2
import numpy as np
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
# 手掌偵測設定
options = HandLandmarkerOptions(
num_hands=2,
base_options=BaseOptions(model_asset_path='model/hand_landmarker.task'),
running_mode=VisionRunningMode.IMAGE)
# 標記文字
MARGIN = 10 # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green
# 繪製手掌骨架
def draw_landmarks_on_image(rgb_image, detection_result):
hand_landmarks_list = detection_result.hand_landmarks
handedness_list = detection_result.handedness
annotated_image = np.copy(rgb_image)
# Loop through the detected hands to visualize.
for idx in range(len(hand_landmarks_list)):
hand_landmarks = hand_landmarks_list[idx]
handedness = handedness_list[idx]
# Draw the hand landmarks.
hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
hand_landmarks_proto.landmark.extend([
landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
])
solutions.drawing_utils.draw_landmarks(
annotated_image,
hand_landmarks_proto,
solutions.hands.HAND_CONNECTIONS,
solutions.drawing_styles.get_default_hand_landmarks_style(),
solutions.drawing_styles.get_default_hand_connections_style())
# Get the top left corner of the detected hand's bounding box.
height, width, _ = annotated_image.shape
x_coordinates = [landmark.x for landmark in hand_landmarks]
y_coordinates = [landmark.y for landmark in hand_landmarks]
text_x = int(min(x_coordinates) * width)
text_y = int(min(y_coordinates) * height) - MARGIN
# Draw handedness (left or right hand) on the image.
cv2.putText(annotated_image, f"{handedness[0].category_name}",
(text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)
return annotated_image
with HandLandmarker.create_from_options(options) as landmarker:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
ret, frame = cap.read()
w = frame.shape[1] # 畫面寬度
h = frame.shape[0] # 畫面高度
if not ret:
print("Cannot receive frame")
break
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
hand_landmarker_result = landmarker.detect(mp_image)
print(hand_landmarker_result.handedness)
annotated_image = draw_landmarks_on_image(frame, hand_landmarker_result)
cv2.imshow('oxxostudio', annotated_image)
if cv2.waitKey(5) == ord('q'):
break # 按下 q 鍵停止
cap.release()
cv2.destroyAllWindows()
參考資料
意見回饋
如果有任何建議或問題,可傳送「意見表單」給我,謝謝~