2 years ago
12 kB
import base64 import html import io import time from IPython.display import display, Javascript from google.colab.output import eval_js import numpy as np from PIL import Image import cv2 def start_input(): js = Javascript(''' var video; var div = null; var stream; var captureCanvas; var imgElement; var labelElement; var pendingResolve = null; var shutdown = false; function removeDom() { stream.getVideoTracks()[0].stop(); video.remove(); div.remove(); video = null; div = null; stream = null; imgElement = null; captureCanvas = null; labelElement = null; } function onAnimationFrame() { if (!shutdown) { window.requestAnimationFrame(onAnimationFrame); } if (pendingResolve) { var result = ""; if (!shutdown) { captureCanvas.getContext('2d').drawImage(video, 0, 0, 512, 512); result = captureCanvas.toDataURL('image/jpeg', 0.8) } var lp = pendingResolve; pendingResolve = null; lp(result); } } async function createDom() { if (div !== null) { return stream; } div = document.createElement('div'); div.style.border = '2px solid black'; div.style.padding = '3px'; div.style.width = '100%'; div.style.maxWidth = '600px'; document.body.appendChild(div); const modelOut = document.createElement('div'); modelOut.innerHTML = "<span>Status:</span>"; labelElement = document.createElement('span'); labelElement.innerText = 'No data'; labelElement.style.fontWeight = 'bold'; modelOut.appendChild(labelElement); div.appendChild(modelOut); video = document.createElement('video'); video.style.display = 'block'; video.width = div.clientWidth - 6; video.setAttribute('playsinline', ''); video.onclick = () => { shutdown = true; }; stream = await navigator.mediaDevices.getUserMedia( {video: { facingMode: "environment"}}); div.appendChild(video); imgElement = document.createElement('img'); imgElement.style.position = 'absolute'; imgElement.style.zIndex = 1; imgElement.onclick = () => { shutdown = true; }; div.appendChild(imgElement); const instruction = document.createElement('div'); instruction.innerHTML = '<span style="color: red; font-weight: bold;">' + 'When finished, click here or on the video to stop this demo</span>'; div.appendChild(instruction); instruction.onclick = () => { shutdown = true; }; video.srcObject = stream; await video.play(); captureCanvas = document.createElement('canvas'); captureCanvas.width = 512; //video.videoWidth; captureCanvas.height = 512; //video.videoHeight; window.requestAnimationFrame(onAnimationFrame); return stream; } async function takePhoto(label, imgData) { if (shutdown) { removeDom(); shutdown = false; return ''; } var preCreate = Date.now(); stream = await createDom(); var preShow = Date.now(); if (label != "") { labelElement.innerHTML = label; } if (imgData != "") { var videoRect = video.getClientRects()[0]; imgElement.style.top = videoRect.top + "px"; imgElement.style.left = videoRect.left + "px"; imgElement.style.width = videoRect.width + "px"; imgElement.style.height = videoRect.height + "px"; imgElement.src = imgData; } var preCapture = Date.now(); var result = await new Promise(function(resolve, reject) { pendingResolve = resolve; }); shutdown = false; return {'create': preShow - preCreate, 'show': preCapture - preShow, 'capture': Date.now() - preCapture, 'img': result}; } ''') display(js) def take_photo(label, img_data): data = eval_js('takePhoto("{}", "{}")'.format(label, img_data)) return data ############# import base64 import html import io import time from IPython.display import display, Javascript from google.colab.output import eval_js import numpy as np from PIL import Image import cv2 def start_input(): js = Javascript(''' var video; var div = null; var stream; var captureCanvas; var imgElement; var labelElement; var pendingResolve = null; var shutdown = false; function removeDom() { stream.getVideoTracks()[0].stop(); video.remove(); div.remove(); video = null; div = null; stream = null; imgElement = null; captureCanvas = null; labelElement = null; } function onAnimationFrame() { if (!shutdown) { window.requestAnimationFrame(onAnimationFrame); } if (pendingResolve) { var result = ""; if (!shutdown) { captureCanvas.getContext('2d').drawImage(video, 0, 0, 512, 512); result = captureCanvas.toDataURL('image/jpeg', 0.8) } var lp = pendingResolve; pendingResolve = null; lp(result); } } async function createDom() { if (div !== null) { return stream; } div = document.createElement('div'); div.style.border = '2px solid black'; div.style.padding = '3px'; div.style.width = '100%'; div.style.maxWidth = '600px'; document.body.appendChild(div); const modelOut = document.createElement('div'); modelOut.innerHTML = "<span>Status:</span>"; labelElement = document.createElement('span'); labelElement.innerText = 'No data'; labelElement.style.fontWeight = 'bold'; modelOut.appendChild(labelElement); div.appendChild(modelOut); video = document.createElement('video'); video.style.display = 'block'; video.width = div.clientWidth - 6; video.setAttribute('playsinline', ''); video.onclick = () => { shutdown = true; }; stream = await navigator.mediaDevices.getUserMedia( {video: { facingMode: "environment"}}); div.appendChild(video); imgElement = document.createElement('img'); imgElement.style.position = 'absolute'; imgElement.style.zIndex = 1; imgElement.onclick = () => { shutdown = true; }; div.appendChild(imgElement); const instruction = document.createElement('div'); instruction.innerHTML = '<span style="color: red; font-weight: bold;">' + 'When finished, click here or on the video to stop this demo</span>'; div.appendChild(instruction); instruction.onclick = () => { shutdown = true; }; video.srcObject = stream; await video.play(); captureCanvas = document.createElement('canvas'); captureCanvas.width = 512; //video.videoWidth; captureCanvas.height = 512; //video.videoHeight; window.requestAnimationFrame(onAnimationFrame); return stream; } async function takePhoto(label, imgData) { if (shutdown) { removeDom(); shutdown = false; return ''; } var preCreate = Date.now(); stream = await createDom(); var preShow = Date.now(); if (label != "") { labelElement.innerHTML = label; } if (imgData != "") { var videoRect = video.getClientRects()[0]; imgElement.style.top = videoRect.top + "px"; imgElement.style.left = videoRect.left + "px"; imgElement.style.width = videoRect.width + "px"; imgElement.style.height = videoRect.height + "px"; imgElement.src = imgData; } var preCapture = Date.now(); var result = await nfrom PIL import Image import numpy as np import base64 import io def js_reply_to_image(js_reply): """ input: js_reply: JavaScript object, contain image from webcam output: image_array: image array RGB size 512 x 512 from webcam """ jpeg_bytes = base64.b64decode(js_reply['img'].split(',')[1]) image_PIL = Image.open(io.BytesIO(jpeg_bytes)) image_array = np.array(image_PIL) return image_array def get_drawing_array(image_array): """ input: image_array: image array RGB size 512 x 512 from webcam output: drawing_array: image RGBA size 512 x 512 only contain bounding box and text, channel A value = 255 if the pixel contains drawing properties (lines, text) else channel A value = 0 """ drawing_array = np.zeros([512,512,4], dtype=np.uint8) img = letterbox(image_array, new_shape=opt.img_size)[0] img = img.transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # (0 - 255) to (0.0 - 1.0) if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Process detections det = pred[0] if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], image_array.shape).round() # Write results for *xyxy, conf, cls in det: label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, drawing_array, label=label, color=colors[int(cls)]) drawing_array[:,:,3] = (drawing_array.max(axis = 2) > 0 ).astype(int) * 255 return drawing_array def drawing_array_to_bytes(drawing_array): """ input: drawing_array: image RGBA size 512 x 512 contain bounding box and text from yolo prediction, channel A value = 255 if the pixel contains drawing properties (lines, text) else channel A value = 0 output: drawing_bytes: string, encoded from drawing_array """ drawing_PIL = Image.fromarray(drawing_array, 'RGBA') iobuf = io.BytesIO() drawing_PIL.save(iobuf, format='png') drawing_bytes = 'data:image/png;base64,{}'.format((str(base64.b64encode(iobuf.getvalue()), 'utf-8'))) return drawing_bytes ###### Record and print pictures start_input() label_html = 'Capturing...' img_data = '' count = 0 start_time = time.time() while True: js_reply = take_photo(label_html, img_data) if not js_reply: break jpeg_bytes = base64.b64decode(js_reply['img'].split(',')[1]) image_PIL = Image.open(io.BytesIO(jpeg_bytes)) image_array = np.array(image_PIL) frame = cv2.flip(image_array, 1) image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. # The model expects a batch of images, so also add an axis with `tf.newaxis`. input_tensor = tf.convert_to_tensor(image_np)[tf.newaxis, ...] # Pass frame through detector detections = detect_fn(input_tensor) # Set detection parameters score_thresh = 0.4 # Minimum threshold for object detection max_detections = 1 # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. scores = detections['detection_scores'][0, :max_detections].numpy() bboxes = detections['detection_boxes'][0, :max_detections].numpy() labels = detections['detection_classes'][0, :max_detections].numpy().astype(np.int64) labels = [category_index[n]['name'] for n in labels] # Display detections visualise_on_image(frame, bboxes, labels, scores, score_thresh) end_time = time.time() fps = int(1/(end_time - start_time)) start_time = end_time cv2_imshow(frame)
Editor is loading...