summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h
blob: a61b275fd598076ec2f8d3b932fff6ea59a85b65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
// Copyright (C) 2010  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_
#ifdef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_

#include "../pixel.h"
#include "../array2d.h"
#include "../geometry.h"
#include "../image_processing/generic_image.h"

namespace dlib
{

    template <
        unsigned int N
        >
    class pyramid_down : noncopyable
    {
        /*!
            REQUIREMENTS ON N
                N > 0

            WHAT THIS OBJECT REPRESENTS
                This is a simple functor to help create image pyramids.  In particular, it
                downsamples images at a ratio of N to N-1.

                Note that setting N to 1 means that this object functions like
                pyramid_disable (defined at the bottom of this file).  

                WARNING, when mapping rectangles from one layer of a pyramid
                to another you might end up with rectangles which extend slightly 
                outside your images.  This is because points on the border of an 
                image at a higher pyramid layer might correspond to points outside 
                images at lower layers.  So just keep this in mind.  Note also
                that it's easy to deal with.  Just say something like this:
                    rect = rect.intersect(get_rect(my_image)); // keep rect inside my_image 
        !*/
    public:

        template <
            typename in_image_type,
            typename out_image_type
            >
        void operator() (
            const in_image_type& original,
            out_image_type& down
        ) const;
        /*!
            requires
                - is_same_object(original, down) == false
                - in_image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
                - out_image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
                - for both pixel types P in the input and output images, we require:
                    - pixel_traits<P>::has_alpha == false
            ensures
                - #down will contain an image that is roughly (N-1)/N times the size of the
                  original image.  
                - If both input and output images contain RGB pixels then the downsampled image will
                  be in color.  Otherwise, the downsampling will be performed in a grayscale mode.
                - The location of a point P in original image will show up at point point_down(P)
                  in the #down image.  
                - Note that some points on the border of the original image might correspond to 
                  points outside the #down image.  
        !*/

        template <
            typename image_type
            >
        void operator() (
            image_type& img
        ) const;
        /*!
            requires
                - image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
                - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
            ensures
                - This function downsamples the given image and stores the results in #img.
                  In particular, it is equivalent to performing: 
                    (*this)(img, temp); 
                    swap(img, temp);
        !*/

    // -------------------------------

        template <typename T>
        vector<double,2> point_down (
            const vector<T,2>& p
        ) const;
        /*!
            ensures
                - interprets p as a point in a parent image and returns the
                  point in a downsampled image which corresponds to p.
                - This function is the inverse of point_up().  I.e. for a point P:
                  point_down(point_up(P)) == P
        !*/

        template <typename T>
        vector<double,2> point_up (
            const vector<T,2>& p
        ) const;
        /*!
            ensures
                - interprets p as a point in a downsampled image and returns the
                  point in a parent image which corresponds to p.
                - This function is the inverse of point_down().  I.e. for a point P:
                  point_up(point_down(P)) == P
        !*/

        drectangle rect_down (
            const drectangle& rect
        ) const;
        /*!
            ensures
                - returns drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
                  (i.e. maps rect into a downsampled)
        !*/

        drectangle rect_up (
            const drectangle& rect
        ) const;
        /*!
            ensures
                - returns drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
                  (i.e. maps rect into a parent image)
        !*/

    // -------------------------------

        template <typename T>
        vector<double,2> point_down (
            const vector<T,2>& p,
            unsigned int levels
        ) const;
        /*!
            ensures
                - applies point_down() to p levels times and returns the result.
                  (i.e. point_down(p,2) == point_down(point_down(p)),
                        point_down(p,1) == point_down(p),
                        point_down(p,0) == p,  etc. )
        !*/

        template <typename T>
        vector<double,2> point_up (
            const vector<T,2>& p,
            unsigned int levels
        ) const;
        /*!
            ensures
                - applies point_up() to p levels times and returns the result.
                  (i.e. point_up(p,2) == point_up(point_up(p)),
                        point_up(p,1) == point_up(p),
                        point_up(p,0) == p,  etc. )
        !*/

        drectangle rect_down (
            const drectangle& rect,
            unsigned int levels
        ) const;
        /*!
            ensures
                - returns drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
                  (i.e. Basically applies rect_down() to rect levels times and returns the result.)
        !*/

        drectangle rect_up (
            const drectangle& rect,
            unsigned int levels
        ) const;
        /*!
            ensures
                - returns drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
                  (i.e. Basically applies rect_up() to rect levels times and returns the result.)
        !*/

    };

// ----------------------------------------------------------------------------------------

    class pyramid_disable : noncopyable
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This is a function object with an interface identical to pyramid_down (defined
                at the top of this file) except that it downsamples images at a ratio of infinity
                to 1.  That means it always outputs images of size 0 regardless of the size
                of the inputs.  
                
                This is useful because it can be supplied to routines which take a pyramid_down 
                function object and it will essentially disable pyramid processing.  This way, 
                a pyramid oriented function can be turned into a regular routine which processes
                just the original undownsampled image.
        !*/
    };

// ----------------------------------------------------------------------------------------

    template <
        unsigned int N
        >
    double pyramid_rate(
        const pyramid_down<N>& pyr
    );
    /*!
        ensures
            - returns (N-1.0)/N
    !*/

// ----------------------------------------------------------------------------------------

    template <
        unsigned int N
        >
    void find_pyramid_down_output_image_size(
        const pyramid_down<N>& pyr,
        long& nr,
        long& nc
    );
    /*!
        requires
            - nr >= 0
            - nc >= 0
        ensures
            - If pyr() were called on an image with nr by nc rows and columns, what would
              be the size of the output image?  This function finds the size of the output
              image and stores it back into #nr and #nc.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename pyramid_type,
        typename image_type1,
        typename image_type2
        >
    void create_tiled_pyramid (
        const image_type1& img,
        image_type2& out_img,
        std::vector<rectangle>& rects,
        const unsigned long padding = 10,
        const unsigned long outer_padding = 0
    );
    /*!
        requires
            - pyramid_type == one of the dlib::pyramid_down template instances defined above.
            - is_same_object(img, out_img) == false
            - image_type1 == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
            - image_type2 == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
            - for both pixel types P in the input and output images, we require:
                - pixel_traits<P>::has_alpha == false
        ensures
            - Creates an image pyramid from the input image img.  The pyramid is made using
              pyramid_type.  The highest resolution image is img and then all further
              pyramid levels are generated from pyramid_type's downsampling.  The entire
              resulting pyramid is packed into a single image and stored in out_img.
            - When packing pyramid levels into out_img, there will be padding pixels of
              space between each sub-image.  There will also be outer_padding pixels of
              padding around the edge of the image.  All padding pixels have a value of 0.
            - The resulting pyramid will be composed of #rects.size() images packed into
              out_img.  Moreover, #rects[i] is the location inside out_img of the i-th
              pyramid level. 
            - #rects.size() > 0
            - #rects[0] == get_rect(img).  I.e. the first rectangle is the highest
              resolution pyramid layer.  Subsequent elements of #rects correspond to
              smaller and smaller pyramid layers inside out_img.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename pyramid_type
        >
    dpoint image_to_tiled_pyramid (
        const std::vector<rectangle>& rects,
        double scale,
        dpoint p
    );
    /*!
        requires
            - pyramid_type == one of the dlib::pyramid_down template instances defined above.
            - 0 < scale <= 1
            - rects.size() > 0
        ensures
            - The function create_tiled_pyramid() converts an image, img, to a "tiled
              pyramid" called out_img.  It also outputs a vector of rectangles, rect, that
              show where each pyramid layer appears in out_img.   Therefore,
              image_to_tiled_pyramid() allows you to map from coordinates in img (i.e. p)
              to coordinates in the tiled pyramid out_img, when given the rects metadata.  

              So given a point p in img, you can ask, what coordinate in out_img
              corresponds to img[p.y()][p.x()] when things are scale times smaller?  This
              new coordinate is a location in out_img and is what is returned by this
              function.
            - A scale of 1 means we don't move anywhere in the pyramid scale space relative
              to the input image while smaller values of scale mean we move down the
              pyramid.
            - Assumes pyramid_type is the pyramid class used to produce the tiled image.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename pyramid_type
        >
    drectangle image_to_tiled_pyramid (
        const std::vector<rectangle>& rects,
        double scale,
        drectangle r
    );
    /*!
        requires
            - pyramid_type == one of the dlib::pyramid_down template instances defined above.
            - 0 < scale <= 1
            - rects.size() > 0
        ensures
            - This function maps from input image space to tiled pyramid coordinate space
              just as the above image_to_tiled_pyramid() does, except it operates on
              rectangle objects instead of points.
            - Assumes pyramid_type is the pyramid class used to produce the tiled image.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename pyramid_type
        >
    dpoint tiled_pyramid_to_image (
        const std::vector<rectangle>& rects,
        dpoint p
    );
    /*!
        requires
            - pyramid_type == one of the dlib::pyramid_down template instances defined above.
            - rects.size() > 0
        ensures
            - This function maps from a coordinate in a tiled pyramid to the corresponding
              input image coordinate.  Therefore, it is essentially the inverse of
              image_to_tiled_pyramid().
            - It should be noted that this function isn't always an inverse of
              image_to_tiled_pyramid().  This is because you can ask
              image_to_tiled_pyramid() for the coordinates of points outside the input
              image and they will be mapped to somewhere that doesn't have an inverse.  But
              for points actually inside the image this function performs an approximate
              inverse mapping.
            - Assumes pyramid_type is the pyramid class used to produce the tiled image.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename pyramid_type
        >
    drectangle tiled_pyramid_to_image (
        const std::vector<rectangle>& rects,
        drectangle r 
    );
    /*!
        requires
            - pyramid_type == one of the dlib::pyramid_down template instances defined above.
            - rects.size() > 0
        ensures
            - This function maps from a coordinate in a tiled pyramid to the corresponding
              input image coordinate.  Therefore, it is essentially the inverse of
              image_to_tiled_pyramid().
            - It should be noted that this function isn't always an inverse of
              image_to_tiled_pyramid().  This is because you can ask
              image_to_tiled_pyramid() for the coordinates of points outside the input
              image and they will be mapped to somewhere that doesn't have an inverse.  But
              for points actually inside the image this function performs an approximate
              inverse mapping.
            - Assumes pyramid_type is the pyramid class used to produce the tiled image.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_