Golang : Handling image beyond OpenCV video capture boundary
One of the problems that I was unable to solve at the previous tutorial on how to put UTF-8 characters on OpenCV image frames is how to deal with the crashing problem caused by openCV's SetROI and Copy functions whenever the UTF-8 image breach the camera's dimension or visual range.
To solve this problem, the rectangle size of the image or let's call it label has to be dynamically resized so that it does not overlap the boundary. The code example below has two solutions.
The first solution is to resize the label dynamically by performing a calculation on the label size needed for each iteration in order not to cause the program to crash. This solution is implemented for X and Y beyond camera's width and height
The second solution is to .... simply don't display the label whenever the label touches the boundary. This second solution is implemented for X and Y less than 0. That's it, the label's position is in beyond the top-left and top-right corner.
Use the method that you find easiest to implement.
To understand how these solutions work, run the code below, test it out by adjusting the sliders.
The PNG image use by the code below can be downloaded at https://d1ohg4ss876yi2.cloudfront.net/golang-convert-png-transparent-background-image-to-jpg-or-jpeg-image/PNG-file.png
Here you go!
package main
import (
"fmt"
"image"
"image/color"
"image/draw"
"image/jpeg"
"math"
"os"
"runtime"
"strconv"
"time"
"github.com/lazywei/go-opencv/opencv"
"github.com/mattn/go-gtk/glib"
"github.com/mattn/go-gtk/gtk"
)
var (
win = new(opencv.Window)
webCamera = new(opencv.Capture)
statusbar = new(gtk.Statusbar)
snapshotFileName string
cameraWidth, cameraHeight int
sliderPosX int = 100
sliderPosY int = 100
penetrationWidth int // to handle beyond cameraWidth
penetrationHeight int
distanceWidth int
distanceHeight int
stopCamera = false // to prevent segmentation fault
backgroundWidth = 267 // from PNG file dimension, change to your own image file dimension
backgroundHeight = 394
elasticWidth = backgroundWidth
elasticHeight = backgroundHeight
horizontalScale = float32(1.0)
verticalScale = float32(1.0)
shear = float32(1.0)
thickness = 3
lineType = 8
textFont = opencv.InitFont(opencv.CV_FONT_HERSHEY_SIMPLEX, horizontalScale, verticalScale, shear, thickness, lineType)
IplImgFrame, utf8TextImg *opencv.IplImage
redColor = opencv.NewScalar(0, 0, 255, 0) // red - (blue, green, red, alpha)
cyanColor = opencv.NewScalar(255, 255, 0, 0) // cyan - (blue, green, red, alpha)
red = color.RGBA{255, 0, 0, 255}
blue = color.RGBA{0, 0, 255, 255}
white = color.RGBA{255, 255, 255, 255}
black = color.RGBA{0, 0, 0, 255}
background *image.RGBA
// more color at https://github.com/golang/image/blob/master/colornames/table.go
)
func opencvImageBGRToBGRA(img *opencv.IplImage) opencv.IplImage {
// The image frames from camera is in RGB (3 channels )
// We need to convert the frames to RGBA (4 channels )
// so that we can perform copy and paste the UTF8 strings
// into the region of interest.
// Using the ToImage() function will work, but will cause delay in refresh rate.
// Use CvtColor() function for the best result
w := img.Width()
h := img.Height()
// create a IplImage with 4 channels
tmp := opencv.CreateImage(w, h, opencv.IPL_DEPTH_8U, 4)
// upgrade BGR to BGRA ( 3 to 4 channels)
opencv.CvtColor(img, tmp, opencv.CV_BGR2BGRA)
return *tmp
}
func BGRAToBGR(img *opencv.IplImage) opencv.IplImage {
w := img.Width()
h := img.Height()
// create a IplImage with 3 channels
tmp := opencv.CreateImage(w, h, opencv.IPL_DEPTH_8U, 3)
// downgrade BGRA to BGR ( 4 to 3 channels)
opencv.CvtColor(img, tmp, 1)
// why use integer value of 1?
// see http://docs.opencv.org/3.1.0/df/d4e/group__imgproc__c.html
return *tmp
}
func processFrameAndUpdate() {
// convert background from image.Image type to opencv.IplImage
utf8TextImg = opencv.FromImage(background)
var utf8TextImgCropped = utf8TextImg
for {
if !stopCamera {
if webCamera.GrabFrame() {
IplImgFrame = webCamera.RetrieveFrame(1)
if IplImgFrame != nil {
*IplImgFrame = opencvImageBGRToBGRA(IplImgFrame)
currentTime := time.Now().Local().Format("2006-01-02 15:04:05 +0800")
// set ROI(Region Of Interest) in IplImageFrame
// and paste our UTF8 runes into ROI via Copy
// need extra care here to check if our rectangle is beyond the boundary
rectTopLeftPosition := strconv.Itoa(sliderPosX) + "," + strconv.Itoa(sliderPosY)
rectTopRightPosition := strconv.Itoa(sliderPosX+elasticWidth) + "," + strconv.Itoa(sliderPosY)
rectBottomLeftPosition := strconv.Itoa(sliderPosX) + "," + strconv.Itoa(sliderPosY+elasticHeight)
rectBottomRightPosition := strconv.Itoa(sliderPosX+elasticWidth) + "," + strconv.Itoa(sliderPosY+elasticHeight)
fmt.Println("Top left : ", rectTopLeftPosition)
fmt.Println("Top right : ", rectTopRightPosition)
fmt.Println("Bottom left : ", rectBottomLeftPosition)
fmt.Println("Bottom right : ", rectBottomRightPosition)
//rect := opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
rect := opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, backgroundHeight)
fmt.Println("Rect beginning : ", rect)
// ---------------------------------------------------------------
// handle the X-axis and Y-axis crossing the camera width and height
if ((sliderPosX + elasticWidth) > cameraWidth) || ((sliderPosY + elasticHeight) > cameraHeight) {
// calculate the penetration distance of utf8TextImg beyond the camera Width
penetrationWidth = (sliderPosX + elasticWidth) - cameraWidth
elasticWidth = elasticWidth - penetrationWidth
// calculate the penetration distance of utf8TextImg beyond the camera Height
penetrationHeight = (sliderPosY + elasticHeight) - cameraHeight
elasticHeight = elasticHeight - penetrationHeight
fmt.Println("ElasticHeight : ", elasticHeight)
fmt.Println("BackgroundHeight : ", backgroundHeight)
// need to crop utf8TextImg as well, otherwise, the Copy() function below
// will crash the program
if (elasticWidth > 0) || (elasticHeight > 0) {
// cap elasticWidth to backgroundWidth
if elasticWidth > backgroundWidth {
elasticWidth = backgroundWidth
}
// cap elasticWidth to backgroundWidth
if elasticHeight > backgroundHeight {
elasticHeight = backgroundHeight
}
rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)
}
if elasticWidth == 0 {
rect = opencv.NewRect(sliderPosX-1, sliderPosY, 1, backgroundHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, 1, backgroundHeight)
}
if elasticHeight == 0 {
rect = opencv.NewRect(sliderPosX, sliderPosY-1, backgroundWidth, 1)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, 1)
}
}
if ((sliderPosX + elasticWidth) < cameraWidth) && (elasticWidth < backgroundWidth) {
// calculate the distance of sliderPosX to camera Width
distanceWidth = sliderPosX - cameraWidth
if distanceWidth <= 0 {
distanceWidth = int(math.Abs(float64(distanceWidth)))
}
// cap to maximum size of backgroundWidth
if distanceWidth > backgroundWidth {
distanceWidth = backgroundWidth
}
compensate := math.Abs(float64(distanceWidth - elasticWidth))
//fmt.Println("Distance from camera width : ", distanceWidth)
//fmt.Println("Compensate back : ", compensate)
//fmt.Println("Elastic width plus compensate : ", elasticWidth+int(compensate))
elasticWidth = elasticWidth + int(compensate)
if elasticWidth > 0 {
rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, backgroundHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, backgroundHeight)
}
if elasticWidth == 0 {
rect = opencv.NewRect(sliderPosX-1, sliderPosY, 1, backgroundHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, 1, backgroundHeight)
}
}
if ((sliderPosY + elasticHeight) < cameraHeight) && (elasticHeight < backgroundHeight) {
// calculate the distance of sliderPosY to camera Height
distanceHeight = sliderPosY - cameraHeight
if distanceHeight <= 0 {
distanceHeight = int(math.Abs(float64(distanceHeight)))
}
// cap to maximum size of backgroundHeight
if distanceHeight > backgroundHeight {
distanceHeight = backgroundHeight
}
compensate := math.Abs(float64(distanceHeight - elasticHeight))
//fmt.Println("Distance from camera height : ", distanceHeight)
//fmt.Println("Compensate back : ", compensate)
//fmt.Println("Elastic height plus compensate : ", elasticHeight+int(compensate))
elasticHeight = elasticHeight + int(compensate)
if elasticHeight > 0 {
rect = opencv.NewRect(sliderPosX, sliderPosY, backgroundWidth, elasticHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, elasticHeight)
}
if elasticHeight == 0 {
rect = opencv.NewRect(sliderPosX, sliderPosY-1, backgroundWidth, 1)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, backgroundWidth, 1)
}
}
if (elasticWidth > 0) && (elasticHeight > 0) {
if ((sliderPosX + backgroundWidth) > cameraWidth) && ((sliderPosY + backgroundHeight) > cameraHeight) {
// we are at bottom right corner
fmt.Println("TODO : Handle X and Y beyond boundary together")
// calculate the distance of sliderPosY to camera Height
distanceHeight = sliderPosY - cameraHeight
if distanceHeight <= 0 {
distanceHeight = int(math.Abs(float64(distanceHeight)))
}
// cap to maximum size of backgroundHeight
if distanceHeight > backgroundHeight {
distanceHeight = backgroundHeight
}
compensate := math.Abs(float64(distanceHeight - elasticHeight))
elasticHeight = elasticHeight + int(compensate)
// calculate the distance of sliderPosX to camera Width
distanceWidth = sliderPosX - cameraWidth
if distanceWidth <= 0 {
distanceWidth = int(math.Abs(float64(distanceWidth)))
}
// cap to maximum size of backgroundWidth
if distanceWidth > backgroundWidth {
distanceWidth = backgroundWidth
}
compensate = math.Abs(float64(distanceWidth - elasticWidth))
elasticWidth = elasticWidth + int(compensate)
rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)
IplImgFrame.SetROI(rect)
opencv.Copy(utf8TextImgCropped, IplImgFrame, nil)
IplImgFrame.ResetROI() // don't forget this!
opencv.Rectangle(IplImgFrame,
opencv.Point{sliderPosX + elasticWidth, sliderPosY},
opencv.Point{sliderPosX, sliderPosY + backgroundHeight},
opencv.ScalarAll(0.0), 2, 2, 0)
} else {
fmt.Println("Elastic width : ", elasticWidth)
fmt.Println("Elastic height : ", elasticHeight)
rect = opencv.NewRect(sliderPosX, sliderPosY, elasticWidth, elasticHeight)
// the easiest solution is ... not to show if the rectangle is beyond
// the camera boundary or visual range :P
// such as entering the negative zones. X and Y less than 0
if !((sliderPosX < 0) || (sliderPosY < 0)) {
IplImgFrame.SetROI(rect)
fmt.Println(utf8TextImgCropped.Width())
fmt.Println(utf8TextImgCropped.Height())
utf8TextImgCropped = opencv.Crop(utf8TextImg, 0, 0, elasticWidth, elasticHeight)
opencv.Copy(utf8TextImgCropped, IplImgFrame, nil)
IplImgFrame.ResetROI() // don't forget this!
}
opencv.Rectangle(IplImgFrame,
opencv.Point{sliderPosX + elasticWidth, sliderPosY},
opencv.Point{sliderPosX, sliderPosY + backgroundHeight},
opencv.ScalarAll(0.0), 2, 2, 0)
}
}
textFont.PutText(IplImgFrame, currentTime, opencv.Point{sliderPosX, sliderPosY + int(verticalScale*200.0)}, cyanColor)
win.ShowImage(IplImgFrame)
}
}
}
}
}
func main() {
cores := runtime.NumCPU()
fmt.Printf("This machine has %d CPU cores. Using all cores. \n", cores)
// maximize CPU usage for maximum performance
runtime.GOMAXPROCS(cores)
jpegImageFile, err := os.Open("./JPEG-file.jpg")
if err != nil {
fmt.Println("JPEG-file.jpg file not found!")
os.Exit(1)
}
defer jpegImageFile.Close()
// create image from JPEG file
//imgSource, err := jpeg.Decode(jpegImageFile)
imgSource, err := jpeg.Decode(jpegImageFile)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
// create a new Image with the same dimension of PNG image
//background = image.NewRGBA(imgSource.Bounds())
// for this example, we harcode the width and height,
// change the dimension to suite your image file
background = image.NewRGBA(image.Rect(0, 0, backgroundWidth, backgroundHeight))
// paste JPEG image OVER to newImage
draw.Draw(background, background.Bounds(), imgSource, imgSource.Bounds().Min, draw.Over)
// a new OpenCV window
win = opencv.NewWindow("Handle image beyond OpenCV video capture boundary")
defer win.Destroy()
// activate webCamera
webCamera = opencv.NewCameraCapture(opencv.CV_CAP_ANY) // autodetect
if webCamera == nil {
panic("Unable to open camera")
}
defer webCamera.Release()
// get some data from camera
cameraWidth = int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_WIDTH))
cameraHeight = int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_HEIGHT))
fmt.Println("Camera width : ", cameraWidth)
fmt.Println("Camera height : ", cameraHeight)
// open up a new "pure" OpenCV window first
go processFrameAndUpdate() // goroutine to update feed from camera
// then our "floating" GTK toolbar
gtk.Init(nil)
window := gtk.NewWindow(gtk.WINDOW_TOPLEVEL)
window.SetPosition(gtk.WIN_POS_CENTER)
window.SetTitle("Example of writing UTF8 text on Go-OpenCV video capture!")
window.SetIconName("gtk-dialog-info")
window.Connect("destroy", func(ctx *glib.CallbackContext) {
println("got destroy!", ctx.Data().(string))
gtk.MainQuit()
}, "Happy coding!")
vbox := gtk.NewVBox(false, 1)
//--------------------------------------------------------
// GtkVPaned
//--------------------------------------------------------
vpaned := gtk.NewVPaned()
vbox.Add(vpaned)
//--------------------------------------------------------
// GtkFrame
//--------------------------------------------------------
frame1 := gtk.NewFrame("Adjust X & Y co-ordinates to place the text location :")
framebox1 := gtk.NewVBox(false, 1)
frame1.Add(framebox1)
//--------------------------------------------------------
// GtkScale
//--------------------------------------------------------
scaleXHBox := gtk.NewHBox(false, 1)
scaleX := gtk.NewHScaleWithRange(-100, float64(cameraWidth), 1)
scaleX.SetValue(float64(sliderPosX))
scaleX.Connect("value-changed", func() {
//println("scale:", int(scale.GetValue()))
sliderPosX = int(scaleX.GetValue())
statusbar.Push(statusbar.GetContextId("go-gtk"), "X : "+strconv.Itoa(sliderPosX)+" Y : "+strconv.Itoa(sliderPosY))
})
scaleXHBox.Add(scaleX)
framebox1.PackStart(scaleXHBox, false, false, 0)
scaleYHBox := gtk.NewHBox(false, 1)
scaleY := gtk.NewHScaleWithRange(-100, float64(cameraHeight), 1)
scaleY.SetValue(float64(sliderPosY))
scaleY.Connect("value-changed", func() {
//println("scale:", int(scale.GetValue()))
sliderPosY = int(scaleY.GetValue())
statusbar.Push(statusbar.GetContextId("go-gtk"), "X : "+strconv.Itoa(sliderPosX)+" Y : "+strconv.Itoa(sliderPosY))
})
scaleYHBox.Add(scaleY)
framebox1.PackStart(scaleYHBox, false, false, 0)
vpaned.Pack1(frame1, false, false)
//--------------------------------------------------------
// GtkHBox
//--------------------------------------------------------
buttons := gtk.NewHBox(false, 1)
//--------------------------------------------------------
// GtkButton
//--------------------------------------------------------
quitButton := gtk.NewButtonWithLabel("Quit")
quitButton.Clicked(func() {
stopCamera = true
// if use defer above, don't release here
// if release here, don't use defer
// otherwise will cause segmentation fault
// --- webCamera.Release() // don't forget to release !!
gtk.MainQuit()
})
buttons.Add(quitButton)
framebox1.PackStart(buttons, false, false, 0)
//--------------------------------------------------------
// GtkVSeparator
//--------------------------------------------------------
vsep := gtk.NewVSeparator()
framebox1.PackStart(vsep, false, false, 0)
statusbar = gtk.NewStatusbar()
//context_id := statusbar.GetContextId("go-gtk")
//--------------------------------------------------------
// GtkStatusbar
//--------------------------------------------------------
framebox1.PackStart(statusbar, false, false, 0)
//--------------------------------------------------------
// Event
//--------------------------------------------------------
window.Add(vbox)
window.SetSizeRequest(600, 128)
window.ShowAll()
gtk.Main()
}
References:
https://socketloop.com/tutorials/golang-put-utf8-text-on-opencv-video-capture-image-frame
See also : Golang : Print UTF-8 fonts on image example
By Adam Ng
IF you gain some knowledge or the information here solved your programming problem. Please consider donating to the less fortunate or some charities that you like. Apart from donation, planting trees, volunteering or reducing your carbon footprint will be great too.
Advertisement
Tutorials
+11.8k Golang : Save webcamera frames to video file
+16.5k Golang : Fix cannot convert buffer (type *bytes.Buffer) to type string error
+12.4k Golang : Pass database connection to function called from another package and HTTP Handler
+9.8k Golang : Channels and buffered channels examples
+9.9k Golang : Print how to use flag for your application example
+13.5k Golang : How to determine if a year is leap year?
+7.1k Golang : Individual and total number of words counter example
+18.2k Golang : Logging with logrus
+16.5k Golang : read gzipped http response
+5.7k PHP : Get client IP address
+9k Golang : How to get garbage collection data?
+21.7k Fix "Failed to start php5-fpm.service: Unit php5-fpm.service is masked."