ncnn Having issues trying to tile a large image for scaling

tyg4sfes  于 2023-03-19  发布在  其他
关注(0)|答案(2)|浏览(220)

I'm trying to convert a 1920 x 800 image into 7680 x 3200 image using super resolution. Since this is a very large convert I'm going to break it up into 8 tiles of 480 x 480 each and then reassemble the 8 tiles into a final image.

My code works on a single image that is 480 x 480, but fails on the larger 1920 x 800 image even if I force a "return" after one iteration.

The first set of pictures is when I run my code on small.png (480x480). It extracts it as one tile, scales it up 4x and writes it to a tiled_final_4x.png successfully (1920x1920).

C:\temp>python test.py
        Tile 1/1

The second set of pictures is when I run the code on big.png. It extracts each tile (480x480) successfully, but the 4x scaling results in corrupted (1920x1920) pngs.

C:\temp>python test.py
        Tile 1/8
        Tile 2/8
        Tile 3/8
        Tile 4/8
        Tile 5/8
        Tile 6/8
        Tile 7/8
        Tile 8/8

Does anyone have any ideas on what I'm doing wrong here?

Here is the code test.py:

import cv2
import ncnn
import numpy as np
import math

def tile_process(img, tile_size, scale):
    height, width, batch = img.shape
    output_height = height * scale
    output_width = width * scale
    output_shape = (output_height, output_width, batch)
    # start with black image
    output = np.zeros(output_shape)

    tiles_x = math.ceil(width / tile_size)
    tiles_y = math.ceil(height / tile_size)
    # loop over all tiles
    for y in range(tiles_y):
        for x in range(tiles_x):

            # extract tile from input image
            ofs_x = x * tile_size
            ofs_y = y * tile_size

            # input tile area on total image
            input_start_x = ofs_x
            input_end_x = min(ofs_x + tile_size, width)
            input_start_y = ofs_y
            input_end_y = min(ofs_y + tile_size, height)

            # input tile dimensions
            input_tile_width = input_end_x - input_start_x
            input_tile_height = input_end_y - input_start_y
            tile_idx = y * tiles_x + x + 1
            input_tile = img[input_start_y:input_end_y, input_start_x:input_end_x, :]

            cv2.imwrite("tile_" + str(y) + "_" + str(x) + ".png", input_tile)
            # Convert image to ncnn Mat

            mat_in = ncnn.Mat.from_pixels(
                input_tile,
                ncnn.Mat.PixelType.PIXEL_BGR,
                input_tile.shape[1],
                input_tile.shape[0],
            )
            mean_vals = []
            norm_vals = [1 / 255.0, 1 / 255.0, 1 / 255.0]
            mat_in.substract_mean_normalize(mean_vals, norm_vals)

            # upscale tile
            try:
                # Make sure the input and output names match the param file
                ex = net.create_extractor()
                ex.input("data", mat_in)
                ret, mat_out = ex.extract("output")
                out = np.array(mat_out)
            except RuntimeError as error:
                print("Error", error)

            # Transpose the output from `c, h, w` to `h, w, c` and put it back in 0-255 range
            print(f"\tTile {tile_idx}/{tiles_x * tiles_y}")
            output_tile = out.transpose(1, 2, 0) * 255
            cv2.imwrite(
                "tile_x4_" + str(y) + "_" + str(x) + ".png", output_tile
            )

            # output tile area on total image
            output_start_x = input_start_x * scale
            output_end_x = input_end_x * scale
            output_start_y = input_start_y * scale
            output_end_y = input_end_y * scale

            # output tile area without padding
            output_end_x_tile = output_end_x - output_start_x
            output_end_y_tile = output_end_y - output_start_y

            # put tile into output image
            output[
                output_start_y:output_end_y, output_start_x:output_end_x, :
            ] = output_tile[0:output_end_y_tile, 0:output_end_x_tile, :]

    return output

net = ncnn.Net()

# Load model param and bin
net.load_param("x4.param")
net.load_model("x4.bin")

# Load image using opencv
img = cv2.imread("big.png")
#img = cv2.imread("small.png")

output = tile_process(img, 480, 4)

cv2.imwrite("tiled_final_4x.png", output)
8i9zcol2

8i9zcol21#

input_tile = img[input_start_y:input_end_y, input_start_x:input_end_x, :]

# get continuous mat 
input_tile = input_tile.copy()
lyr7nygr

lyr7nygr2#

Thanks that worked! This is my first time working with image processing..

相关问题