images.go 2.92 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package imageproc

import (
	"image"
	"image/color"

	"golang.org/x/image/draw"
)

var (
	ImageNetDefaultMean  = [3]float32{0.485, 0.456, 0.406}
	ImageNetDefaultSTD   = [3]float32{0.229, 0.224, 0.225}
	ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
	ImageNetStandardSTD  = [3]float32{0.5, 0.5, 0.5}
	ClipDefaultMean      = [3]float32{0.48145466, 0.4578275, 0.40821073}
	ClipDefaultSTD       = [3]float32{0.26862954, 0.26130258, 0.27577711}
)

const (
	ResizeBilinear = iota
	ResizeNearestNeighbor
	ResizeApproxBilinear
	ResizeCatmullrom
)

// Composite returns an image with the alpha channel removed by drawing over a white background.
func Composite(img image.Image) image.Image {
	dst := image.NewRGBA(img.Bounds())

	white := color.RGBA{255, 255, 255, 255}
	draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
	draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)

	return dst
}

// Resize returns an image which has been scaled to a new size.
func Resize(img image.Image, newSize image.Point, method int) image.Image {
	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))

	kernels := map[int]draw.Interpolator{
		ResizeBilinear:        draw.BiLinear,
		ResizeNearestNeighbor: draw.NearestNeighbor,
		ResizeApproxBilinear:  draw.ApproxBiLinear,
		ResizeCatmullrom:      draw.CatmullRom,
	}

	kernel, ok := kernels[method]
	if !ok {
		panic("no resizing method found")
	}

	kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)

	return dst
}

// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
	var pixelVals []float32

	bounds := img.Bounds()
	if channelFirst {
		var rVals, gVals, bVals []float32
		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
			for x := bounds.Min.X; x < bounds.Max.X; x++ {
				c := img.At(x, y)
				r, g, b, _ := c.RGBA()
				var rVal, gVal, bVal float32
				if rescale {
					rVal = float32(r>>8) / 255.0
					gVal = float32(g>>8) / 255.0
					bVal = float32(b>>8) / 255.0
				}

				rVal = (rVal - mean[0]) / std[0]
				gVal = (gVal - mean[1]) / std[1]
				bVal = (bVal - mean[2]) / std[2]

				rVals = append(rVals, rVal)
				gVals = append(gVals, gVal)
				bVals = append(bVals, bVal)
			}
		}

		pixelVals = append(pixelVals, rVals...)
		pixelVals = append(pixelVals, gVals...)
		pixelVals = append(pixelVals, bVals...)
	} else {
		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
			for x := bounds.Min.X; x < bounds.Max.X; x++ {
				c := img.At(x, y)
				r, g, b, _ := c.RGBA()
				var rVal, gVal, bVal float32
				if rescale {
					rVal = float32(r>>8) / 255.0
					gVal = float32(g>>8) / 255.0
					bVal = float32(b>>8) / 255.0
				}

				rVal = (rVal - mean[0]) / std[0]
				gVal = (gVal - mean[1]) / std[1]
				bVal = (bVal - mean[2]) / std[2]

				pixelVals = append(pixelVals, rVal, gVal, bVal)
			}
		}
	}

	return pixelVals
}