import base64
import html
import io
import time
from IPython.display import display, Javascript
from google.colab.output import eval_js
import numpy as np
from PIL import Image
import cv2
def start_input():
js = Javascript('''
var video;
var div = null;
var stream;
var captureCanvas;
var imgElement;
var labelElement;
var pendingResolve = null;
var shutdown = false;
function removeDom() {
stream.getVideoTracks()[0].stop();
video.remove();
div.remove();
video = null;
div = null;
stream = null;
imgElement = null;
captureCanvas = null;
labelElement = null;
}
function onAnimationFrame() {
if (!shutdown) {
window.requestAnimationFrame(onAnimationFrame);
}
if (pendingResolve) {
var result = "";
if (!shutdown) {
captureCanvas.getContext('2d').drawImage(video, 0, 0, 512, 512);
result = captureCanvas.toDataURL('image/jpeg', 0.8)
}
var lp = pendingResolve;
pendingResolve = null;
lp(result);
}
}
async function createDom() {
if (div !== null) {
return stream;
}
div = document.createElement('div');
div.style.border = '2px solid black';
div.style.padding = '3px';
div.style.width = '100%';
div.style.maxWidth = '600px';
document.body.appendChild(div);
const modelOut = document.createElement('div');
modelOut.innerHTML = "<span>Status:</span>";
labelElement = document.createElement('span');
labelElement.innerText = 'No data';
labelElement.style.fontWeight = 'bold';
modelOut.appendChild(labelElement);
div.appendChild(modelOut);
video = document.createElement('video');
video.style.display = 'block';
video.width = div.clientWidth - 6;
video.setAttribute('playsinline', '');
video.onclick = () => { shutdown = true; };
stream = await navigator.mediaDevices.getUserMedia(
{video: { facingMode: "environment"}});
div.appendChild(video);
imgElement = document.createElement('img');
imgElement.style.position = 'absolute';
imgElement.style.zIndex = 1;
imgElement.onclick = () => { shutdown = true; };
div.appendChild(imgElement);
const instruction = document.createElement('div');
instruction.innerHTML =
'<span style="color: red; font-weight: bold;">' +
'When finished, click here or on the video to stop this demo</span>';
div.appendChild(instruction);
instruction.onclick = () => { shutdown = true; };
video.srcObject = stream;
await video.play();
captureCanvas = document.createElement('canvas');
captureCanvas.width = 512; //video.videoWidth;
captureCanvas.height = 512; //video.videoHeight;
window.requestAnimationFrame(onAnimationFrame);
return stream;
}
async function takePhoto(label, imgData) {
if (shutdown) {
removeDom();
shutdown = false;
return '';
}
var preCreate = Date.now();
stream = await createDom();
var preShow = Date.now();
if (label != "") {
labelElement.innerHTML = label;
}
if (imgData != "") {
var videoRect = video.getClientRects()[0];
imgElement.style.top = videoRect.top + "px";
imgElement.style.left = videoRect.left + "px";
imgElement.style.width = videoRect.width + "px";
imgElement.style.height = videoRect.height + "px";
imgElement.src = imgData;
}
var preCapture = Date.now();
var result = await new Promise(function(resolve, reject) {
pendingResolve = resolve;
});
shutdown = false;
return {'create': preShow - preCreate,
'show': preCapture - preShow,
'capture': Date.now() - preCapture,
'img': result};
}
''')
display(js)
def take_photo(label, img_data):
data = eval_js('takePhoto("{}", "{}")'.format(label, img_data))
return data
#############
import base64
import html
import io
import time
from IPython.display import display, Javascript
from google.colab.output import eval_js
import numpy as np
from PIL import Image
import cv2
def start_input():
js = Javascript('''
var video;
var div = null;
var stream;
var captureCanvas;
var imgElement;
var labelElement;
var pendingResolve = null;
var shutdown = false;
function removeDom() {
stream.getVideoTracks()[0].stop();
video.remove();
div.remove();
video = null;
div = null;
stream = null;
imgElement = null;
captureCanvas = null;
labelElement = null;
}
function onAnimationFrame() {
if (!shutdown) {
window.requestAnimationFrame(onAnimationFrame);
}
if (pendingResolve) {
var result = "";
if (!shutdown) {
captureCanvas.getContext('2d').drawImage(video, 0, 0, 512, 512);
result = captureCanvas.toDataURL('image/jpeg', 0.8)
}
var lp = pendingResolve;
pendingResolve = null;
lp(result);
}
}
async function createDom() {
if (div !== null) {
return stream;
}
div = document.createElement('div');
div.style.border = '2px solid black';
div.style.padding = '3px';
div.style.width = '100%';
div.style.maxWidth = '600px';
document.body.appendChild(div);
const modelOut = document.createElement('div');
modelOut.innerHTML = "<span>Status:</span>";
labelElement = document.createElement('span');
labelElement.innerText = 'No data';
labelElement.style.fontWeight = 'bold';
modelOut.appendChild(labelElement);
div.appendChild(modelOut);
video = document.createElement('video');
video.style.display = 'block';
video.width = div.clientWidth - 6;
video.setAttribute('playsinline', '');
video.onclick = () => { shutdown = true; };
stream = await navigator.mediaDevices.getUserMedia(
{video: { facingMode: "environment"}});
div.appendChild(video);
imgElement = document.createElement('img');
imgElement.style.position = 'absolute';
imgElement.style.zIndex = 1;
imgElement.onclick = () => { shutdown = true; };
div.appendChild(imgElement);
const instruction = document.createElement('div');
instruction.innerHTML =
'<span style="color: red; font-weight: bold;">' +
'When finished, click here or on the video to stop this demo</span>';
div.appendChild(instruction);
instruction.onclick = () => { shutdown = true; };
video.srcObject = stream;
await video.play();
captureCanvas = document.createElement('canvas');
captureCanvas.width = 512; //video.videoWidth;
captureCanvas.height = 512; //video.videoHeight;
window.requestAnimationFrame(onAnimationFrame);
return stream;
}
async function takePhoto(label, imgData) {
if (shutdown) {
removeDom();
shutdown = false;
return '';
}
var preCreate = Date.now();
stream = await createDom();
var preShow = Date.now();
if (label != "") {
labelElement.innerHTML = label;
}
if (imgData != "") {
var videoRect = video.getClientRects()[0];
imgElement.style.top = videoRect.top + "px";
imgElement.style.left = videoRect.left + "px";
imgElement.style.width = videoRect.width + "px";
imgElement.style.height = videoRect.height + "px";
imgElement.src = imgData;
}
var preCapture = Date.now();
var result = await nfrom PIL import Image
import numpy as np
import base64
import io
def js_reply_to_image(js_reply):
"""
input:
js_reply: JavaScript object, contain image from webcam
output:
image_array: image array RGB size 512 x 512 from webcam
"""
jpeg_bytes = base64.b64decode(js_reply['img'].split(',')[1])
image_PIL = Image.open(io.BytesIO(jpeg_bytes))
image_array = np.array(image_PIL)
return image_array
def get_drawing_array(image_array):
"""
input:
image_array: image array RGB size 512 x 512 from webcam
output:
drawing_array: image RGBA size 512 x 512 only contain bounding box and text,
channel A value = 255 if the pixel contains drawing properties (lines, text)
else channel A value = 0
"""
drawing_array = np.zeros([512,512,4], dtype=np.uint8)
img = letterbox(image_array, new_shape=opt.img_size)[0]
img = img.transpose(2, 0, 1)
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(device)
img = img.float() # uint8 to fp16/32
img /= 255.0 # (0 - 255) to (0.0 - 1.0)
if img.ndimension() == 3:
img = img.unsqueeze(0)
pred = model(img)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
# Process detections
det = pred[0]
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], image_array.shape).round()
# Write results
for *xyxy, conf, cls in det:
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, drawing_array, label=label, color=colors[int(cls)])
drawing_array[:,:,3] = (drawing_array.max(axis = 2) > 0 ).astype(int) * 255
return drawing_array
def drawing_array_to_bytes(drawing_array):
"""
input:
drawing_array: image RGBA size 512 x 512
contain bounding box and text from yolo prediction,
channel A value = 255 if the pixel contains drawing properties (lines, text)
else channel A value = 0
output:
drawing_bytes: string, encoded from drawing_array
"""
drawing_PIL = Image.fromarray(drawing_array, 'RGBA')
iobuf = io.BytesIO()
drawing_PIL.save(iobuf, format='png')
drawing_bytes = 'data:image/png;base64,{}'.format((str(base64.b64encode(iobuf.getvalue()), 'utf-8')))
return drawing_bytes
###### Record and print pictures
start_input()
label_html = 'Capturing...'
img_data = ''
count = 0
start_time = time.time()
while True:
js_reply = take_photo(label_html, img_data)
if not js_reply:
break
jpeg_bytes = base64.b64decode(js_reply['img'].split(',')[1])
image_PIL = Image.open(io.BytesIO(jpeg_bytes))
image_array = np.array(image_PIL)
frame = cv2.flip(image_array, 1)
image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
# The model expects a batch of images, so also add an axis with `tf.newaxis`.
input_tensor = tf.convert_to_tensor(image_np)[tf.newaxis, ...]
# Pass frame through detector
detections = detect_fn(input_tensor)
# Set detection parameters
score_thresh = 0.4 # Minimum threshold for object detection
max_detections = 1
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
scores = detections['detection_scores'][0, :max_detections].numpy()
bboxes = detections['detection_boxes'][0, :max_detections].numpy()
labels = detections['detection_classes'][0, :max_detections].numpy().astype(np.int64)
labels = [category_index[n]['name'] for n in labels]
# Display detections
visualise_on_image(frame, bboxes, labels, scores, score_thresh)
end_time = time.time()
fps = int(1/(end_time - start_time))
start_time = end_time
cv2_imshow(frame)