mirror of
https://github.com/physwizz/a155-U-u1.git
synced 2025-09-26 19:04:54 +00:00
4815 lines
170 KiB
C
4815 lines
170 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2020 MediaTek Inc.
|
|
*
|
|
* Author: Fish Wu <fish.wu@mediatek.com>
|
|
*
|
|
*/
|
|
|
|
#include "mtk_aie.h"
|
|
#include <linux/delay.h>
|
|
#include <linux/firmware.h>
|
|
#include <linux/device.h>
|
|
#include <linux/dma-heap.h>
|
|
#include "mtk_heap.h"
|
|
#include <uapi/linux/dma-heap.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/soc/mediatek/mtk-cmdq-ext.h>
|
|
#include <aie_mp_fw/config/dma_def.h>
|
|
#include <aie_mp_fw/kernel/dma_def.h>
|
|
#include <aie_mp_fw/all_header.h>
|
|
#include "cmdq-sec.h"
|
|
#include "cmdq-sec-iwc-common.h"
|
|
|
|
|
|
#define FDVT_USE_GCE 1
|
|
#define FLD
|
|
#define FLD_ALIGN 128
|
|
#define CHECK_SERVICE_0 0
|
|
#define BUFTAG "AIE"
|
|
//#include <mtkcam-hwcore/imgsys/inc/drv/gce/mt6983/gce_module.h>
|
|
|
|
struct cmdq_pkt *g_sec_pkt;
|
|
|
|
static const unsigned int fd_wdma_en[fd_loop_num][output_WDMA_WRA_num] = {
|
|
{1, 0, 0, 0}, {1, 0, 1, 0}, {1, 0, 1, 0}, {1, 0, 0, 0}, {1, 1, 1, 1},
|
|
{1, 1, 1, 1}, {1, 0, 0, 0}, {1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 1, 0}, {1, 0, 1, 0}, {1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1},
|
|
{1, 0, 0, 0}, {1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 1, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 1, 0},
|
|
{1, 0, 1, 0}, {1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 0, 0, 0},
|
|
{1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 1, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 0, 0, 0},
|
|
{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0} };
|
|
|
|
static const unsigned int out_stride_size[fd_loop_num][output_WDMA_WRA_num] = {
|
|
{1, 0, 0, 0}, {1, 0, 2, 0}, {1, 0, 2, 0}, {1, 0, 0, 0}, {1, 1, 2, 2},
|
|
{1, 1, 2, 2}, {1, 0, 0, 0}, {1, 0, 2, 0}, {1, 1, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 2, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {3, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 2, 0}, {1, 0, 2, 0}, {1, 0, 0, 0}, {1, 1, 2, 2}, {1, 1, 2, 2},
|
|
{1, 0, 0, 0}, {1, 0, 2, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 2, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {3, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 2, 0},
|
|
{1, 0, 2, 0}, {1, 0, 0, 0}, {1, 1, 2, 2}, {1, 1, 2, 2}, {1, 0, 0, 0},
|
|
{1, 0, 2, 0}, {1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 2, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 0, 0, 0},
|
|
{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 0, 0}, {1, 1, 0, 0}, {1, 1, 0, 0},
|
|
{1, 0, 0, 0}, {3, 0, 0, 0} };
|
|
|
|
static const unsigned int fd_ker_rdma_size[fd_loop_num][kernel_RDMA_RA_num] = {
|
|
{240, 240}, {1168, 1168}, {1168, 1168}, {272, 272}, {2320, 2320},
|
|
{2080, 2080}, {1040, 1040}, {4624, 4624}, {3104, 3104}, {9232, 9232},
|
|
{4624, 4624}, {4128, 4128}, {1040, 1040}, {4624, 4624}, {4624, 4624},
|
|
{1552, 1552}, {4624, 4624}, {4624, 4624}, {4128, 4128}, {1040, 1040},
|
|
{1040, 1040}, {528, 528}, {4160, 4160}, {4160, 4160}, {2080, 2080},
|
|
{2080, 2080}, {2080, 2080}, {1040, 1040}, {0, 0}, {240, 240},
|
|
{1168, 1168}, {1168, 1168}, {272, 272}, {2320, 2320}, {2080, 2080},
|
|
{1040, 1040}, {4624, 4624}, {3104, 3104}, {9232, 9232}, {4624, 4624},
|
|
{4128, 4128}, {1040, 1040}, {4624, 4624}, {4624, 4624}, {1552, 1552},
|
|
{4624, 4624}, {4624, 4624}, {4128, 4128}, {1040, 1040}, {1040, 1040},
|
|
{528, 528}, {4160, 4160}, {4160, 4160}, {2080, 2080}, {2080, 2080},
|
|
{2080, 2080}, {1040, 1040}, {0, 0}, {240, 240}, {1168, 1168},
|
|
{1168, 1168}, {272, 272}, {2320, 2320}, {2080, 2080}, {1040, 1040},
|
|
{4624, 4624}, {3104, 3104}, {9232, 9232}, {4624, 4624}, {4128, 4128},
|
|
{1040, 1040}, {4624, 4624}, {4624, 4624}, {1552, 1552}, {4624, 4624},
|
|
{4624, 4624}, {4128, 4128}, {1040, 1040}, {1040, 1040}, {528, 528},
|
|
{4160, 4160}, {4160, 4160}, {2080, 2080}, {2080, 2080}, {2080, 2080},
|
|
{1040, 1040}, {0, 0} };
|
|
|
|
static const unsigned int fd_out_stride2_in[fd_loop_num] = {
|
|
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
|
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
static const unsigned int fd_stride[fd_loop_num] = {
|
|
2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
|
|
static const unsigned int fd_maxpool[fd_loop_num] = {
|
|
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
static const unsigned int out_2size[fd_loop_num] = {
|
|
0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
static const unsigned int in_ch_pack[fd_loop_num] = {
|
|
1, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 1, 16, 16, 16, 16, 16, 32,
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 0, 1, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0};
|
|
|
|
static const unsigned int outlayer[fd_loop_num] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
|
|
static const unsigned int out_ch_pack[fd_loop_num] = {
|
|
16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 32, 16, 16, 16, 32, 32, 32, 32, 32, 32, 0, 16,
|
|
16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 16, 16, 16, 32, 32, 32, 32, 32, 32, 0, 16, 16,
|
|
16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 16, 16, 16, 32, 32, 32, 32, 32, 32, 0};
|
|
|
|
static const unsigned int anchor_en_num[fd_loop_num] = {
|
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
|
|
|
|
/* [loop][ch][output_index] */
|
|
static const signed int fd_rdma_en[fd_loop_num][input_WDMA_WRA_num][2] = {
|
|
{{99, 99}, {99, 99}, {99, 99}, {-1, -1} },
|
|
{{0, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{1, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{1, 0}, {2, 0}, {-1, -1}, {-1, -1} },
|
|
{{3, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{1, 2}, {2, 2}, {4, 2}, {4, 3} },
|
|
{{5, 0}, {5, 1}, {-1, -1}, {-1, -1} },
|
|
{{6, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{5, 0}, {5, 1}, {7, 0}, {-1, -1} },
|
|
{{8, 0}, {8, 1}, {-1, -1}, {-1, -1} },
|
|
{{9, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{5, 2}, {5, 3}, {7, 2}, {10, 2} },
|
|
{{11, 0}, {11, 1}, {-1, -1}, {-1, -1} },
|
|
{{12, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{13, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{11, 0}, {11, 1}, {14, 0}, {-1, -1} },
|
|
{{15, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{16, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{11, 0}, {11, 1}, {14, 0}, {17, 0} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {18, 1}, {-1, -1}, {-1, -1} },
|
|
{{19, 0}, {22, 0}, {22, 1}, {25, 0} },
|
|
{{99, 99}, {99, 99}, {99, 99}, {-1, -1} },
|
|
{{29, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{30, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{30, 0}, {31, 0}, {-1, -1}, {-1, -1} },
|
|
{{32, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{30, 2}, {31, 2}, {33, 2}, {33, 3} },
|
|
{{34, 0}, {34, 1}, {-1, -1}, {-1, -1} },
|
|
{{35, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{34, 0}, {34, 1}, {36, 0}, {-1, -1} },
|
|
{{37, 0}, {37, 1}, {-1, -1}, {-1, -1} },
|
|
{{38, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{34, 2}, {34, 3}, {36, 2}, {39, 2} },
|
|
{{40, 0}, {40, 1}, {-1, -1}, {-1, -1} },
|
|
{{41, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{42, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{40, 0}, {40, 1}, {43, 0}, {-1, -1} },
|
|
{{44, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{45, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{40, 0}, {40, 1}, {43, 0}, {46, 0} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{47, 0}, {47, 1}, {-1, -1}, {-1, -1} },
|
|
{{48, 0}, {51, 0}, {51, 1}, {54, 0} },
|
|
{{99, 99}, {99, 99}, {99, 99}, {-1, -1} },
|
|
{{58, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{59, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{59, 0}, {60, 0}, {-1, -1}, {-1, -1} },
|
|
{{61, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{59, 2}, {60, 2}, {62, 2}, {62, 3} },
|
|
{{63, 0}, {63, 1}, {-1, -1}, {-1, -1} },
|
|
{{64, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{63, 0}, {63, 1}, {65, 0}, {-1, -1} },
|
|
{{66, 0}, {66, 1}, {-1, -1}, {-1, -1} },
|
|
{{67, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{63, 2}, {63, 3}, {65, 2}, {68, 2} },
|
|
{{69, 0}, {69, 1}, {-1, -1}, {-1, -1} },
|
|
{{70, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{71, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{69, 0}, {69, 1}, {72, 0}, {-1, -1} },
|
|
{{73, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{74, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{69, 0}, {69, 1}, {72, 0}, {75, 0} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{76, 0}, {76, 1}, {-1, -1}, {-1, -1} },
|
|
{{77, 0}, {80, 0}, {80, 1}, {83, 0} } };
|
|
|
|
static const unsigned int attr_wdma_en[attr_loop_num][output_WDMA_WRA_num] = {
|
|
{1, 0, 1, 0}, {1, 0, 1, 0}, {1, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1},
|
|
{1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 1, 0}, {1, 1, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 1, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0}, {1, 0, 0, 0},
|
|
{1, 0, 0, 0} };
|
|
static const unsigned int
|
|
attr_ker_rdma_size[attr_loop_num][kernel_RDMA_RA_num] = {
|
|
{240, 240}, {1168, 1168}, {272, 272}, {2320, 2320},
|
|
{2080, 2080}, {9232, 9232}, {3104, 3104}, {9232, 9232},
|
|
{4128, 4128}, {1040, 1040}, {4624, 4624}, {4624, 4624},
|
|
{1552, 1552}, {4624, 4624}, {4624, 4624}, {4128, 4128},
|
|
{9232, 9232}, {272, 272}, {9232, 9232}, {2320, 2320},
|
|
{144, 144}, {9232, 9232}, {272, 272}, {9232, 9232},
|
|
{2320, 2320}, {144, 144} };
|
|
static const unsigned int attr_out_stride2_as_in[attr_loop_num] = {
|
|
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
static const unsigned int attr_fd_stride[attr_loop_num] = {/* H */
|
|
2, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1};
|
|
static const unsigned int attr_fd_maxpool[attr_loop_num] = {/* L */
|
|
1, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0};
|
|
static const unsigned int attr_out_2size[attr_loop_num] = {/* O */
|
|
1, 1, 0, 1, 1, 1, 0,
|
|
1, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0};
|
|
static const unsigned int attr_input_ch_pack[attr_loop_num] = {
|
|
1, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 16};
|
|
/* [loop][ch][output_index] */
|
|
static const signed int attr_rdma_en[attr_loop_num][input_WDMA_WRA_num][2] = {
|
|
{{99, 99}, {99, 99}, {99, 99}, {-1, -1} },
|
|
{{0, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{0, 0}, {1, 0}, {-1, -1}, {-1, -1} },
|
|
{{2, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{0, 2}, {1, 2}, {3, 2}, {3, 3} },
|
|
{{4, 0}, {4, 1}, {-1, -1}, {-1, -1} },
|
|
{{4, 0}, {4, 1}, {5, 0}, {-1, -1} },
|
|
{{6, 0}, {6, 1}, {-1, -1}, {-1, -1} },
|
|
{{4, 2}, {4, 3}, {5, 2}, {7, 2} },
|
|
{{8, 0}, {8, 1}, {-1, -1}, {-1, -1} },
|
|
{{9, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{10, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{8, 0}, {8, 1}, {11, 0}, {-1, -1} },
|
|
{{12, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{13, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{8, 0}, {8, 1}, {11, 0}, {14, 0} },
|
|
{{15, 0}, {15, 1}, {-1, -1}, {-1, -1} },
|
|
{{16, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{15, 0}, {15, 1}, {-1, -1}, {-1, -1} },
|
|
{{18, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{19, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{15, 0}, {15, 1}, {-1, -1}, {-1, -1} },
|
|
{{21, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{15, 0}, {15, 1}, {-1, -1}, {-1, -1} },
|
|
{{23, 0}, {-1, -1}, {-1, -1}, {-1, -1} },
|
|
{{24, 0}, {-1, -1}, {-1, -1}, {-1, -1} } };
|
|
|
|
static const unsigned int attr_wdma_size[attr_loop_num][output_WDMA_WRA_num] = {
|
|
{16384, 0, 4096, 0},
|
|
{16384, 0, 4096, 0},
|
|
{16384, 0, 0, 0},
|
|
{16384, 16384, 4096, 4096},
|
|
{8192, 8192, 2048, 2048},
|
|
{8192, 0, 2048, 0},
|
|
{8192, 8192, 0, 0},
|
|
{8192, 0, 2048, 0},
|
|
{2048, 2048, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{2048, 2048, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{0, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{1024, 0, 0, 0},
|
|
{0, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{0, 0, 0, 0},
|
|
{2048, 0, 0, 0},
|
|
{1024, 0, 0, 0},
|
|
{0, 0, 0, 0} };
|
|
/* (128-bits ALIGN work-around)*/
|
|
#define fld_blink_weight_size 6528 //6416 +(128-(6416%128))%128
|
|
#define fld_blink_weight_size_non_align 6416
|
|
#define fld_cv_size 1280
|
|
#define fld_cv_size_00 1536
|
|
#define fld_cv_size_00_non_align 1472
|
|
#define fld_fp_size 5376 //5344+(128-(5344%128))%128
|
|
#define fld_fp_size_non_align 5344
|
|
#define fld_leafnode_size 307200
|
|
#define fld_tree_size 8064 //8000 +(128-(8000%128))%128
|
|
#define fld_tree_size_non_align 8000
|
|
#define fld_result_size 112
|
|
#define fld_forest 14
|
|
#define fld_point 500
|
|
#define fld_cur_landmark 11
|
|
#define CHECK_SERVICE_IF_0 0
|
|
|
|
#if CHECK_SERVICE_IF_0
|
|
int FDVT_M4U_TranslationFault_callback(int port,
|
|
unsigned int mva,
|
|
void *data)
|
|
{
|
|
pr_info("[FDVT_M4U]fault call port=%d, mva=0x%x", port, mva);
|
|
|
|
switch (port) {
|
|
#if CHECK_SERVICE_IF_0
|
|
case M4U_PORT_FDVT_RDA:
|
|
case M4U_PORT_FDVT_RDB:
|
|
case M4U_PORT_FDVT_WRA:
|
|
case M4U_PORT_FDVT_WRB:
|
|
#endif
|
|
default: //ISP_FDVT_BASE = 0x1b001000
|
|
pr_info("FDVT_IN_BASE_ADR_0:0x%08x, FDVT_IN_BASE_ADR_1:0x%08x, FDVT_IN_BASE_ADR_2:0x%08x, FDVT_IN_BASE_ADR_3:0x%08x\n",
|
|
FDVT_RD32(FDVT_IN_BASE_ADR_0_REG),
|
|
FDVT_RD32(FDVT_IN_BASE_ADR_1_REG),
|
|
FDVT_RD32(FDVT_IN_BASE_ADR_2_REG),
|
|
FDVT_RD32(FDVT_IN_BASE_ADR_3_REG));
|
|
pr_info("FDVT_OUT_BASE_ADR_0:0x%08x, FDVT_OUT_BASE_ADR_1:0x%08x, FDVT_OUT_BASE_ADR_2:0x%08x, FDVT_OUT_BASE_ADR_3:0x%08x\n",
|
|
FDVT_RD32(FDVT_OUT_BASE_ADR_0_REG),
|
|
FDVT_RD32(FDVT_OUT_BASE_ADR_1_REG),
|
|
FDVT_RD32(FDVT_OUT_BASE_ADR_2_REG),
|
|
FDVT_RD32(FDVT_OUT_BASE_ADR_3_REG));
|
|
pr_info("FDVT_KERNEL_BASE_ADR_0:0x%08x, FDVT_KERNEL_BASE_ADR_1:0x%08x\n",
|
|
FDVT_RD32(FDVT_KERNEL_BASE_ADR_0_REG),
|
|
FDVT_RD32(FDVT_KERNEL_BASE_ADR_1_REG));
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
#endif
|
|
static void aie_free_dmabuf(struct mtk_aie_dev *fd, struct imem_buf_info *bufinfo)
|
|
{
|
|
if (bufinfo->dmabuf) {
|
|
dma_heap_buffer_free(bufinfo->dmabuf);
|
|
bufinfo->dmabuf = NULL;
|
|
}
|
|
}
|
|
|
|
static void aie_free_iova(struct mtk_aie_dev *fd, struct imem_buf_info *bufinfo)
|
|
{
|
|
if (bufinfo->pa) {
|
|
/*free iova*/
|
|
dma_buf_unmap_attachment(bufinfo->attach, bufinfo->sgt, DMA_BIDIRECTIONAL);
|
|
dma_buf_detach(bufinfo->dmabuf, bufinfo->attach);
|
|
bufinfo->pa = 0;
|
|
}
|
|
}
|
|
|
|
static void aie_free_va(struct mtk_aie_dev *fd, struct imem_buf_info *bufinfo)
|
|
{
|
|
if (bufinfo->va) {
|
|
dma_buf_vunmap(bufinfo->dmabuf, bufinfo->va);
|
|
bufinfo->va = NULL;
|
|
}
|
|
}
|
|
|
|
struct dma_buf *aie_imem_sec_alloc(struct mtk_aie_dev *fd, u32 size, bool IsSecure)
|
|
{
|
|
struct dma_heap *dma_heap;
|
|
struct dma_buf *my_dma_buf;
|
|
|
|
if (IsSecure)
|
|
dma_heap = dma_heap_find("mtk_prot_region");
|
|
else
|
|
dma_heap = dma_heap_find("mtk_mm-uncached");
|
|
|
|
|
|
if (!dma_heap) {
|
|
dev_info(fd->dev, "heap find fail\n");
|
|
return NULL;
|
|
}
|
|
|
|
my_dma_buf = dma_heap_buffer_alloc(dma_heap, size, O_RDWR |
|
|
O_CLOEXEC, DMA_HEAP_VALID_HEAP_FLAGS);
|
|
dma_heap_put(dma_heap);
|
|
if (IS_ERR(my_dma_buf)) {
|
|
dev_info(fd->dev, "buffer alloc fail\n");
|
|
return NULL;
|
|
}
|
|
mtk_dma_buf_set_name(my_dma_buf, BUFTAG);
|
|
return my_dma_buf;
|
|
}
|
|
|
|
unsigned long long aie_get_sec_iova(struct mtk_aie_dev *fd, struct dma_buf *my_dma_buf,
|
|
struct imem_buf_info *bufinfo)
|
|
{
|
|
struct dma_buf_attachment *attach;
|
|
unsigned long long iova = 0;
|
|
struct sg_table *sgt;
|
|
|
|
attach = dma_buf_attach(my_dma_buf, fd->dev);
|
|
if (IS_ERR(attach)) {
|
|
dev_info(fd->dev, "attach fail, return\n");
|
|
return 0;
|
|
}
|
|
bufinfo->attach = attach;
|
|
|
|
sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
|
|
if (IS_ERR(sgt)) {
|
|
dev_info(fd->dev, "map failed, detach and return\n");
|
|
dma_buf_detach(my_dma_buf, attach);
|
|
return 0;
|
|
}
|
|
bufinfo->sgt = sgt;
|
|
|
|
iova = sg_dma_address(sgt->sgl);
|
|
|
|
return iova;
|
|
}
|
|
|
|
void *aie_get_va(struct mtk_aie_dev *fd, struct dma_buf *my_dma_buf)
|
|
{
|
|
void *buf_ptr = dma_buf_vmap(my_dma_buf);
|
|
|
|
if (!buf_ptr) {
|
|
dev_info(fd->dev, "map failed\n");
|
|
return NULL;
|
|
}
|
|
return buf_ptr;
|
|
}
|
|
#if CHECK_SERVICE_IF_0
|
|
static int aie_imem_alloc(struct mtk_aie_dev *fd, u32 size,
|
|
struct imem_buf_info *bufinfo)
|
|
{
|
|
struct device *dev = fd->dev;
|
|
void *va;
|
|
dma_addr_t dma_handle;
|
|
|
|
va = dma_alloc_coherent(dev, size, &dma_handle, GFP_KERNEL);
|
|
if (!va)
|
|
return -ENOMEM;
|
|
|
|
bufinfo->va = va;
|
|
bufinfo->pa = dma_handle;
|
|
bufinfo->size = size;
|
|
|
|
dev_info(fd->dev, "%s: vAddr(0x%p)(0x%llx), pAddr(0x%pad), size(%d)\n",
|
|
__func__, va, (u64 *)va, &dma_handle, size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void aie_imem_free(struct mtk_aie_dev *fd, struct imem_buf_info *bufinfo)
|
|
{
|
|
dev_info(fd->dev, "%s: vAddr(0x%p)(0x%llx), pAddr(0x%p), size(%d)\n",
|
|
__func__, bufinfo->va, (u64 *)bufinfo->va, bufinfo->pa,
|
|
bufinfo->size);
|
|
|
|
if (bufinfo->size != 0) {
|
|
dma_free_coherent(fd->dev, bufinfo->size, bufinfo->va, bufinfo->pa);
|
|
bufinfo->size = 0;
|
|
}
|
|
}
|
|
#endif
|
|
static void aie_init_table(struct mtk_aie_dev *fd, u16 pym_width,
|
|
u16 pym_height)
|
|
{
|
|
int i;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
pstv->img_width[pym2_start_loop] = pym_width / 4;
|
|
pstv->img_height[pym2_start_loop] = pym_height / 4;
|
|
|
|
pstv->img_width[pym1_start_loop] = pym_width / 2;
|
|
pstv->img_height[pym1_start_loop] = pym_height / 2;
|
|
|
|
pstv->img_width[pym0_start_loop] = pym_width;
|
|
pstv->img_height[pym0_start_loop] = pym_height;
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
if (i != pym2_start_loop && i != pym1_start_loop &&
|
|
i != pym0_start_loop) {
|
|
if (fd_out_stride2_in[i] == 1) {
|
|
pstv->img_width[i] =
|
|
pstv->stride2_out_width[i - 1];
|
|
pstv->img_height[i] =
|
|
pstv->stride2_out_height[i - 1];
|
|
} else {
|
|
pstv->img_width[i] = pstv->out_width[i - 1];
|
|
pstv->img_height[i] = pstv->out_height[i - 1];
|
|
}
|
|
}
|
|
|
|
if (fd_maxpool[i] == 1 && fd_stride[i] == 1) {
|
|
pstv->out_width[i] =
|
|
(pstv->img_width[i] - 1) /
|
|
(2 * fd_maxpool[i]) + 1;
|
|
pstv->out_height[i] = (pstv->img_height[i] - 1) /
|
|
(2 * fd_maxpool[i]) + 1;
|
|
} else {
|
|
pstv->out_width[i] =
|
|
(pstv->img_width[i] - 1) /
|
|
(fd_stride[i] + 2 * fd_maxpool[i]) + 1;
|
|
pstv->out_height[i] =
|
|
(pstv->img_height[i] - 1) /
|
|
(fd_stride[i] + 2 * fd_maxpool[i]) + 1;
|
|
}
|
|
|
|
pstv->stride2_out_width[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) * out_2size[i];
|
|
pstv->stride2_out_height[i] =
|
|
((pstv->out_height[i] - 1) / 2 + 1) * out_2size[i];
|
|
|
|
if (outlayer[i] == 1) {
|
|
pstv->out_xsize_plus_1[i] =
|
|
pstv->out_width[i] * out_ch_pack[i] * 2;
|
|
pstv->out_stride[i] = round_up(
|
|
pstv->out_xsize_plus_1[i] * anchor_en_num[i],
|
|
16);
|
|
pstv->out_xsize_plus_1_stride2[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) *
|
|
out_ch_pack[i] * 2 * out_2size[i];
|
|
} else {
|
|
pstv->out_xsize_plus_1[i] =
|
|
pstv->out_width[i] * out_ch_pack[i];
|
|
pstv->out_stride[i] =
|
|
round_up(pstv->out_xsize_plus_1[i], 16);
|
|
pstv->out_xsize_plus_1_stride2[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) *
|
|
out_ch_pack[i] * out_2size[i];
|
|
}
|
|
|
|
pstv->out_stride_stride2[i] =
|
|
round_up(pstv->out_xsize_plus_1_stride2[i], 16);
|
|
|
|
if (out_2size[i] == 1)
|
|
pstv->out_ysize_plus_1_stride2[i] =
|
|
(pstv->out_height[i] - 1) / 2 + 1;
|
|
else
|
|
pstv->out_ysize_plus_1_stride2[i] = pstv->out_height[i];
|
|
|
|
if (fd_wdma_en[i][0]) {
|
|
if (i == rpn2_loop_num || i == rpn1_loop_num ||
|
|
i == rpn0_loop_num) {
|
|
pstv->fd_wdma_size[i][0] = result_size;
|
|
} else {
|
|
pstv->fd_wdma_size[i][0] = pstv->out_height[i] *
|
|
pstv->out_stride[i];
|
|
}
|
|
}
|
|
|
|
if (outlayer[i] == 1) {
|
|
if (fd_wdma_en[i][1])
|
|
pstv->fd_wdma_size[i][1] =
|
|
pstv->fd_wdma_size[i][0];
|
|
if (fd_wdma_en[i][2])
|
|
pstv->fd_wdma_size[i][2] =
|
|
pstv->fd_wdma_size[i][0];
|
|
if (fd_wdma_en[i][3])
|
|
pstv->fd_wdma_size[i][3] =
|
|
pstv->fd_wdma_size[i][0];
|
|
} else if (i == rpn2_loop_num || i == rpn1_loop_num ||
|
|
i == rpn0_loop_num) {
|
|
pstv->fd_wdma_size[i][0] = result_size;
|
|
} else {
|
|
if (fd_wdma_en[i][1])
|
|
pstv->fd_wdma_size[i][1] = pstv->out_height[i] *
|
|
pstv->out_stride[i];
|
|
if (fd_wdma_en[i][2])
|
|
pstv->fd_wdma_size[i][2] =
|
|
pstv->out_ysize_plus_1_stride2[i] *
|
|
pstv->out_stride_stride2[i];
|
|
if (fd_wdma_en[i][3])
|
|
pstv->fd_wdma_size[i][3] =
|
|
pstv->out_ysize_plus_1_stride2[i] *
|
|
pstv->out_stride_stride2[i];
|
|
}
|
|
|
|
if (in_ch_pack[i] == 1)
|
|
pstv->input_xsize_plus_1[i] =
|
|
round_up(pstv->img_width[i], 8);
|
|
else
|
|
pstv->input_xsize_plus_1[i] =
|
|
pstv->img_width[i] * in_ch_pack[i];
|
|
}
|
|
}
|
|
|
|
static void aie_update_table(struct mtk_aie_dev *fd, u16 pym_width,
|
|
u16 pym_height)
|
|
{
|
|
int i;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
pstv->img_width[pym2_start_loop] = pym_width / 4;
|
|
pstv->img_height[pym2_start_loop] = pym_height / 4;
|
|
|
|
pstv->img_width[pym1_start_loop] = pym_width / 2;
|
|
pstv->img_height[pym1_start_loop] = pym_height / 2;
|
|
|
|
pstv->img_width[pym0_start_loop] = pym_width;
|
|
pstv->img_height[pym0_start_loop] = pym_height;
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
if (i != pym2_start_loop && i != pym1_start_loop &&
|
|
i != pym0_start_loop) {
|
|
if (fd_out_stride2_in[i] == 1) {
|
|
pstv->img_width[i] =
|
|
pstv->stride2_out_width[i - 1];
|
|
pstv->img_height[i] =
|
|
pstv->stride2_out_height[i - 1];
|
|
} else {
|
|
pstv->img_width[i] = pstv->out_width[i - 1];
|
|
pstv->img_height[i] = pstv->out_height[i - 1];
|
|
}
|
|
}
|
|
|
|
if (fd_maxpool[i] == 1 && fd_stride[i] == 1) {
|
|
pstv->out_width[i] =
|
|
(pstv->img_width[i] - 1) /
|
|
(2 * fd_maxpool[i]) + 1;
|
|
pstv->out_height[i] = (pstv->img_height[i] - 1) /
|
|
(2 * fd_maxpool[i]) + 1;
|
|
} else {
|
|
pstv->out_width[i] =
|
|
(pstv->img_width[i] - 1) /
|
|
(fd_stride[i] + 2 * fd_maxpool[i]) + 1;
|
|
pstv->out_height[i] =
|
|
(pstv->img_height[i] - 1) /
|
|
(fd_stride[i] + 2 * fd_maxpool[i]) + 1;
|
|
}
|
|
|
|
pstv->stride2_out_width[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) * out_2size[i];
|
|
pstv->stride2_out_height[i] =
|
|
((pstv->out_height[i] - 1) / 2 + 1) * out_2size[i];
|
|
|
|
if (outlayer[i] == 1) {
|
|
pstv->out_xsize_plus_1[i] =
|
|
pstv->out_width[i] * out_ch_pack[i] * 2;
|
|
pstv->out_stride[i] = round_up(
|
|
pstv->out_xsize_plus_1[i] * anchor_en_num[i],
|
|
16);
|
|
pstv->out_xsize_plus_1_stride2[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) *
|
|
out_ch_pack[i] * 2 * out_2size[i];
|
|
} else {
|
|
pstv->out_xsize_plus_1[i] =
|
|
pstv->out_width[i] * out_ch_pack[i];
|
|
pstv->out_stride[i] =
|
|
round_up(pstv->out_xsize_plus_1[i], 16);
|
|
pstv->out_xsize_plus_1_stride2[i] =
|
|
((pstv->out_width[i] - 1) / 2 + 1) *
|
|
out_ch_pack[i] * out_2size[i];
|
|
}
|
|
|
|
pstv->out_stride_stride2[i] =
|
|
round_up(pstv->out_xsize_plus_1_stride2[i], 16);
|
|
|
|
if (out_2size[i] == 1)
|
|
pstv->out_ysize_plus_1_stride2[i] =
|
|
(pstv->out_height[i] - 1) / 2 + 1;
|
|
else
|
|
pstv->out_ysize_plus_1_stride2[i] = pstv->out_height[i];
|
|
|
|
if (in_ch_pack[i] == 1)
|
|
pstv->input_xsize_plus_1[i] =
|
|
round_up(pstv->img_width[i], 8);
|
|
else
|
|
pstv->input_xsize_plus_1[i] =
|
|
pstv->img_width[i] * in_ch_pack[i];
|
|
}
|
|
}
|
|
|
|
static void aie_get_data_size(struct mtk_aie_dev *fd, u16 max_img_width,
|
|
u16 max_img_height)
|
|
{
|
|
u8 i, j;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
fd->base_para->max_img_width = max_img_width;
|
|
fd->base_para->max_img_height = max_img_height;
|
|
fd->fd_dma_max_size = 0;
|
|
fd->fd_dma_rst_max_size = 0;
|
|
fd->fd_fd_kernel_size = 0;
|
|
fd->fd_attr_kernel_size = 0;
|
|
fd->fd_attr_dma_max_size = 0;
|
|
fd->fd_attr_dma_rst_max_size = 0;
|
|
|
|
/* FDMODE Dram Buffer Size */
|
|
fd->fd_rs_cfg_size = fd_rs_confi_size;
|
|
fd->fd_fd_cfg_size = fd_fd_confi_size;
|
|
fd->fd_yuv2rgb_cfg_size = fd_yuv2rgb_confi_size;
|
|
|
|
/* ATTRMODE Dram Buffer Size */
|
|
fd->attr_fd_cfg_size = attr_fd_confi_size;
|
|
fd->attr_yuv2rgb_cfg_size = attr_yuv2rgb_confi_size;
|
|
|
|
/* HW Output Buffer Size */
|
|
fd->rs_pym_out_size[0] = fd->base_para->max_pyramid_width *
|
|
fd->base_para->max_pyramid_height;
|
|
fd->rs_pym_out_size[1] = fd->rs_pym_out_size[0] / 4;
|
|
fd->rs_pym_out_size[2] = fd->rs_pym_out_size[0] / 16;
|
|
|
|
/* FDMODE Dram Buffer Size */
|
|
for (i = rpn1_loop_num + 1 ; i < rpn0_loop_num - 1; i++) {
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
fd->fd_dma_max_size += pstv->fd_wdma_size[i][j];
|
|
}
|
|
}
|
|
fd->fd_dma_rst_max_size = pstv->fd_wdma_size[rpn2_loop_num][0] +
|
|
pstv->fd_wdma_size[rpn1_loop_num][0] +
|
|
pstv->fd_wdma_size[rpn0_loop_num][0];
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
if (fd_ker_rdma_size[i][j])
|
|
fd->fd_fd_kernel_size += fd_ker_rdma_size[i][j];
|
|
}
|
|
}
|
|
|
|
/* ATTRMODE Dram Buffer Size */
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
if (attr_wdma_en[i][j]) {
|
|
if ((i == age_out_rgs || i == gender_out_rgs ||
|
|
i == indian_out_rgs || i == race_out_rgs) &&
|
|
(j == 0)) {
|
|
fd->fd_attr_dma_rst_max_size +=
|
|
ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
} else {
|
|
fd->fd_attr_dma_max_size += attr_wdma_size[i][j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++)
|
|
fd->fd_attr_kernel_size += attr_ker_rdma_size[i][j];
|
|
}
|
|
|
|
/* FD Pose secure result output buffer: result size * 3 loops */
|
|
//fd->fd_dma_rst_max_size += result_size * 3;
|
|
}
|
|
|
|
static int aie_alloc_dram_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
u8 i;
|
|
u32 alloc_size;
|
|
unsigned long long addr = 0;
|
|
unsigned int msb_bit = 0;
|
|
struct dma_buf *ret_buf = NULL;
|
|
unsigned long long iova = 0;
|
|
void *va = NULL;
|
|
|
|
/* RS DRAM */
|
|
alloc_size = fd->fd_rs_cfg_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->rs_cfg_data);
|
|
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->rs_cfg_data.dmabuf = ret_buf;
|
|
fd->rs_cfg_data.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->rs_cfg_data);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->rs_cfg_data.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->rs_cfg_data.va = va;
|
|
|
|
addr = fd->rs_cfg_data.pa;
|
|
msb_bit = (addr & 0Xf00000000) >> 32; //MASK MSB-BIT
|
|
|
|
writel(msb_bit, fd->fd_base + FDVT_RS_CON_BASE_ADR_MSB);
|
|
|
|
/* FD MODE */
|
|
fd->base_para->fd_rs_cfg_pa = fd->rs_cfg_data.pa;
|
|
fd->base_para->fd_rs_cfg_va = fd->rs_cfg_data.va;
|
|
|
|
/* FD DRAM */
|
|
alloc_size =
|
|
fd->fd_fd_cfg_size + fd->attr_fd_cfg_size * MAX_ENQUE_FRAME_NUM;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fd_cfg_data);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fd_cfg_data.dmabuf = ret_buf;
|
|
fd->fd_cfg_data.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fd_cfg_data);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fd_cfg_data.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fd_cfg_data.va = va;
|
|
|
|
addr = fd->fd_cfg_data.pa;
|
|
msb_bit = (addr & 0Xf00000000) >> 32; //MASK MSB-BIT
|
|
|
|
writel(msb_bit, fd->fd_base + FDVT_FD_CON_BASE_ADR_MSB);
|
|
|
|
/* FD MODE */
|
|
fd->base_para->fd_fd_cfg_pa = fd->fd_cfg_data.pa;
|
|
fd->base_para->fd_fd_cfg_va = fd->fd_cfg_data.va;
|
|
/* ATTR MODE */
|
|
fd->base_para->attr_fd_cfg_pa[0] =
|
|
fd->base_para->fd_fd_cfg_pa + fd->fd_fd_cfg_size;
|
|
fd->base_para->attr_fd_cfg_va[0] =
|
|
fd->base_para->fd_fd_cfg_va + fd->fd_fd_cfg_size;
|
|
|
|
for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
|
|
fd->base_para->attr_fd_cfg_pa[i] =
|
|
fd->base_para->attr_fd_cfg_pa[i - 1] +
|
|
fd->attr_fd_cfg_size;
|
|
fd->base_para->attr_fd_cfg_va[i] =
|
|
fd->base_para->attr_fd_cfg_va[i - 1] +
|
|
fd->attr_fd_cfg_size;
|
|
}
|
|
|
|
/* YUV2RGB DRAM */
|
|
alloc_size = fd->fd_yuv2rgb_cfg_size +
|
|
fd->attr_yuv2rgb_cfg_size * MAX_ENQUE_FRAME_NUM;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->yuv2rgb_cfg_data);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->yuv2rgb_cfg_data.dmabuf = ret_buf;
|
|
fd->yuv2rgb_cfg_data.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->yuv2rgb_cfg_data);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->yuv2rgb_cfg_data.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->yuv2rgb_cfg_data.va = va;
|
|
|
|
addr = fd->yuv2rgb_cfg_data.pa;
|
|
msb_bit = (addr & 0Xf00000000) >> 32; //MASK MSB-BIT
|
|
|
|
writel(msb_bit, fd->fd_base + FDVT_YUV2RGB_CON_BASE_ADR_MSB);
|
|
|
|
|
|
/* FD MODE */
|
|
fd->base_para->fd_yuv2rgb_cfg_pa = fd->yuv2rgb_cfg_data.pa;
|
|
fd->base_para->fd_yuv2rgb_cfg_va = fd->yuv2rgb_cfg_data.va;
|
|
|
|
/* ATTR MODE */
|
|
fd->base_para->attr_yuv2rgb_cfg_pa[0] =
|
|
fd->base_para->fd_yuv2rgb_cfg_pa + fd->fd_yuv2rgb_cfg_size;
|
|
fd->base_para->attr_yuv2rgb_cfg_va[0] =
|
|
fd->base_para->fd_yuv2rgb_cfg_va + fd->fd_yuv2rgb_cfg_size;
|
|
|
|
for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
|
|
fd->base_para->attr_yuv2rgb_cfg_pa[i] =
|
|
fd->base_para->attr_yuv2rgb_cfg_pa[i - 1] +
|
|
fd->attr_yuv2rgb_cfg_size;
|
|
fd->base_para->attr_yuv2rgb_cfg_va[i] =
|
|
fd->base_para->attr_yuv2rgb_cfg_va[i - 1] +
|
|
fd->attr_yuv2rgb_cfg_size;
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static int aie_alloc_output_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
int ret = 0;
|
|
u32 alloc_size = 0;
|
|
int i, j, pa_off = 0, va_off = 0;
|
|
struct dma_buf *ret_buf = NULL;
|
|
unsigned long long iova = 0;
|
|
void *va = NULL;
|
|
|
|
for (i = 0; i < PYM_NUM; i++)
|
|
alloc_size += fd->rs_pym_out_size[i] * 3;
|
|
|
|
if (g_user_param.is_secure) {
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, true);
|
|
if (!ret_buf)
|
|
return -1;
|
|
fd->rs_output_hw.size = alloc_size;
|
|
fd->rs_output_hw.dmabuf = ret_buf;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->rs_output_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->rs_output_hw.pa = iova;
|
|
} else {
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->rs_output_hw.size = alloc_size;
|
|
fd->rs_output_hw.dmabuf = ret_buf;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->rs_output_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->rs_output_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->rs_output_hw.va = va;
|
|
}
|
|
|
|
for (i = 0; i < PYM_NUM; i++) {
|
|
for (j = 0; j < COLOR_NUM; j++) {
|
|
fd->base_para->rs_pym_rst_pa[i][j] =
|
|
fd->rs_output_hw.pa + pa_off;
|
|
pa_off += fd->rs_pym_out_size[i];
|
|
|
|
fd->base_para->rs_pym_rst_va[i][j] =
|
|
fd->rs_output_hw.va + va_off;
|
|
va_off += fd->rs_pym_out_size[i];
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void aie_alloc_normal(struct mtk_aie_dev *fd, unsigned int start,
|
|
unsigned int end)
|
|
{
|
|
unsigned int i, j;
|
|
unsigned int pi, pj;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
if (start == 0 || end <= start)
|
|
return;
|
|
|
|
pi = start - 1;
|
|
pj = 0;
|
|
for (i = start; i < end + 1; i++) {
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
if (fd_wdma_en[i][j]) {
|
|
fd->dma_para->fd_out_hw_pa[i][j] =
|
|
fd->dma_para->fd_out_hw_pa[pi][pj] +
|
|
pstv->fd_wdma_size[pi][pj];
|
|
pi = i;
|
|
pj = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int aie_alloc_fddma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
u32 alloc_size;
|
|
struct dma_buf *ret_buf = NULL;
|
|
unsigned long long iova = 0;
|
|
void *va = NULL;
|
|
|
|
|
|
alloc_size = fd->fd_dma_max_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fd_dma_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fd_dma_hw.dmabuf = ret_buf;
|
|
fd->fd_dma_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fd_dma_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fd_dma_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fd_dma_hw.va = va;
|
|
|
|
|
|
alloc_size = fd->fd_fd_kernel_size + fd->fd_attr_kernel_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fd_kernel_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fd_kernel_hw.dmabuf = ret_buf;
|
|
fd->fd_kernel_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fd_kernel_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fd_kernel_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fd_kernel_hw.va = va;
|
|
|
|
|
|
alloc_size = fd->fd_attr_dma_max_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fd_attr_dma_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fd_attr_dma_hw.dmabuf = ret_buf;
|
|
fd->fd_attr_dma_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fd_attr_dma_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fd_attr_dma_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fd_attr_dma_hw.va = va;
|
|
|
|
alloc_size = fd->fd_dma_rst_max_size + fd->fd_attr_dma_rst_max_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fd_dma_result_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fd_dma_result_hw.dmabuf = ret_buf;
|
|
fd->fd_dma_result_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fd_dma_result_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fd_dma_result_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fd_dma_result_hw.va = va;
|
|
|
|
return 0;
|
|
}
|
|
#ifdef FLD
|
|
static int aie_alloc_fld_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
|
|
u32 alloc_size;
|
|
unsigned long long addr = 0;
|
|
unsigned int msb_bit = 0;
|
|
struct dma_buf *ret_buf = NULL;
|
|
unsigned long long iova = 0;
|
|
void *va = NULL;
|
|
|
|
alloc_size = fld_blink_weight_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_blink_weight_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_blink_weight_hw.dmabuf = ret_buf;
|
|
fd->fld_blink_weight_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_blink_weight_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_blink_weight_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_blink_weight_hw.va = va;
|
|
|
|
addr = fd->fld_blink_weight_hw.pa;
|
|
msb_bit = (addr & 0Xf00000000) >> 8; //MASK MSB-BIT
|
|
writel(msb_bit, fd->fd_base + FLD_BS_IN_BASE_ADDR_8_15_MSB);
|
|
|
|
|
|
alloc_size = fld_fp_size * FLD_MAX_INPUT;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_fp_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_fp_hw.dmabuf = ret_buf;
|
|
fd->fld_fp_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_fp_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_fp_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_fp_hw.va = va;
|
|
|
|
alloc_size = (fld_cv_size * (FLD_MAX_INPUT-1)) + fld_cv_size_00;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_cv_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_cv_hw.dmabuf = ret_buf;
|
|
fd->fld_cv_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_cv_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_cv_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_cv_hw.va = va;
|
|
|
|
|
|
alloc_size = fld_leafnode_size * FLD_MAX_INPUT;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_leafnode_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_leafnode_hw.dmabuf = ret_buf;
|
|
fd->fld_leafnode_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_leafnode_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_leafnode_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_leafnode_hw.va = va;
|
|
|
|
|
|
alloc_size = fld_tree_size * FLD_MAX_INPUT;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_tree_02_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_tree_02_hw.dmabuf = ret_buf;
|
|
fd->fld_tree_02_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_tree_02_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_tree_02_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_tree_02_hw.va = va;
|
|
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_tree_13_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_tree_13_hw.dmabuf = ret_buf;
|
|
fd->fld_tree_13_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_tree_13_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_tree_13_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_tree_13_hw.va = va;
|
|
|
|
alloc_size = fld_result_size;
|
|
//ret = aie_imem_alloc(fd, alloc_size, &fd->fld_output_hw);
|
|
ret_buf = aie_imem_sec_alloc(fd, alloc_size, false);
|
|
if (!ret_buf)
|
|
return -1;
|
|
|
|
fd->fld_output_hw.dmabuf = ret_buf;
|
|
fd->fld_output_hw.size = alloc_size;
|
|
iova = aie_get_sec_iova(fd, ret_buf, &fd->fld_output_hw);
|
|
if (!iova)
|
|
return -1;
|
|
|
|
fd->fld_output_hw.pa = iova;
|
|
va = aie_get_va(fd, ret_buf);
|
|
if (!va)
|
|
return -1;
|
|
|
|
fd->fld_output_hw.va = va;
|
|
|
|
addr = fd->fld_output_hw.pa;
|
|
msb_bit = (addr & 0Xf00000000) >> 32;
|
|
writel(msb_bit, fd->fd_base + FLD_TR_OUT_BASE_ADDR_0_MSB);
|
|
writel(msb_bit, fd->fd_base + FLD_PP_OUT_BASE_ADDR_0_MSB);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
static void aie_arrange_fddma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
dma_addr_t currentPA;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
/* 0~18 */
|
|
fd->dma_para->fd_out_hw_pa[0][0] = fd->fd_dma_hw.pa;
|
|
aie_alloc_normal(fd, 1, 18);
|
|
|
|
/* 19~27 */
|
|
fd->dma_para->fd_out_hw_pa[19][0] =
|
|
fd->dma_para->fd_out_hw_pa[18][1] + pstv->fd_wdma_size[18][1];
|
|
fd->dma_para->fd_out_hw_pa[19][1] =
|
|
fd->dma_para->fd_out_hw_pa[19][0] + pstv->out_xsize_plus_1[19];
|
|
fd->dma_para->fd_out_hw_pa[20][0] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
2 * pstv->out_xsize_plus_1[20];
|
|
fd->dma_para->fd_out_hw_pa[20][1] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
3 * pstv->out_xsize_plus_1[20];
|
|
fd->dma_para->fd_out_hw_pa[21][0] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
4 * pstv->out_xsize_plus_1[21];
|
|
fd->dma_para->fd_out_hw_pa[22][0] =
|
|
fd->dma_para->fd_out_hw_pa[19][0] + pstv->fd_wdma_size[19][0] +
|
|
pstv->fd_wdma_size[19][1] + pstv->fd_wdma_size[20][0] +
|
|
pstv->fd_wdma_size[20][1] + pstv->fd_wdma_size[21][0];
|
|
fd->dma_para->fd_out_hw_pa[22][1] =
|
|
fd->dma_para->fd_out_hw_pa[22][0] + pstv->fd_wdma_size[22][0] +
|
|
pstv->fd_wdma_size[22][2] + pstv->fd_wdma_size[23][0] +
|
|
pstv->fd_wdma_size[23][2] + pstv->fd_wdma_size[24][0];
|
|
fd->dma_para->fd_out_hw_pa[22][2] =
|
|
fd->dma_para->fd_out_hw_pa[22][0] + pstv->out_xsize_plus_1[22];
|
|
fd->dma_para->fd_out_hw_pa[22][3] =
|
|
fd->dma_para->fd_out_hw_pa[22][1] + pstv->out_xsize_plus_1[22];
|
|
fd->dma_para->fd_out_hw_pa[23][0] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
2 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][1] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
2 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][2] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
3 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][3] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
3 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[24][0] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
4 * pstv->out_xsize_plus_1[24];
|
|
fd->dma_para->fd_out_hw_pa[24][1] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
4 * pstv->out_xsize_plus_1[24];
|
|
fd->dma_para->fd_out_hw_pa[25][0] =
|
|
fd->dma_para->fd_out_hw_pa[22][1] + pstv->fd_wdma_size[22][1] +
|
|
pstv->fd_wdma_size[22][3] + pstv->fd_wdma_size[23][1] +
|
|
pstv->fd_wdma_size[23][3] + pstv->fd_wdma_size[24][1];
|
|
fd->dma_para->fd_out_hw_pa[25][1] =
|
|
fd->dma_para->fd_out_hw_pa[25][0] + pstv->out_xsize_plus_1[25];
|
|
fd->dma_para->fd_out_hw_pa[26][0] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
2 * pstv->out_xsize_plus_1[26];
|
|
fd->dma_para->fd_out_hw_pa[26][1] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
3 * pstv->out_xsize_plus_1[26];
|
|
fd->dma_para->fd_out_hw_pa[27][0] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
4 * pstv->out_xsize_plus_1[27];
|
|
|
|
fd->dma_para->fd_out_hw_pa[29][0] = fd->fd_dma_hw.pa;
|
|
#if CHECK_SERVICE_0
|
|
fd->dma_para->fd_out_hw_pa[29][0] =
|
|
fd->dma_para->fd_out_hw_pa[25][0] + pstv->fd_wdma_size[25][0] +
|
|
pstv->fd_wdma_size[25][1] + pstv->fd_wdma_size[26][0] +
|
|
pstv->fd_wdma_size[26][1] + pstv->fd_wdma_size[27][0];
|
|
#endif
|
|
aie_alloc_normal(fd, 30, 47);
|
|
/* 48~56 */
|
|
fd->dma_para->fd_out_hw_pa[48][0] =
|
|
fd->dma_para->fd_out_hw_pa[47][1] + pstv->fd_wdma_size[47][1];
|
|
fd->dma_para->fd_out_hw_pa[48][1] =
|
|
fd->dma_para->fd_out_hw_pa[48][0] + pstv->out_xsize_plus_1[48];
|
|
fd->dma_para->fd_out_hw_pa[49][0] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
2 * pstv->out_xsize_plus_1[49];
|
|
fd->dma_para->fd_out_hw_pa[49][1] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
3 * pstv->out_xsize_plus_1[49];
|
|
fd->dma_para->fd_out_hw_pa[50][0] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
4 * pstv->out_xsize_plus_1[50];
|
|
fd->dma_para->fd_out_hw_pa[51][0] =
|
|
fd->dma_para->fd_out_hw_pa[48][0] + pstv->fd_wdma_size[48][0] +
|
|
pstv->fd_wdma_size[48][1] + pstv->fd_wdma_size[49][0] +
|
|
pstv->fd_wdma_size[49][1] + pstv->fd_wdma_size[50][0];
|
|
fd->dma_para->fd_out_hw_pa[51][1] =
|
|
fd->dma_para->fd_out_hw_pa[51][0] + pstv->fd_wdma_size[51][0] +
|
|
pstv->fd_wdma_size[51][2] + pstv->fd_wdma_size[52][0] +
|
|
pstv->fd_wdma_size[52][2] + pstv->fd_wdma_size[53][0];
|
|
fd->dma_para->fd_out_hw_pa[51][2] =
|
|
fd->dma_para->fd_out_hw_pa[51][0] + pstv->out_xsize_plus_1[51];
|
|
fd->dma_para->fd_out_hw_pa[51][3] =
|
|
fd->dma_para->fd_out_hw_pa[51][1] + pstv->out_xsize_plus_1[51];
|
|
fd->dma_para->fd_out_hw_pa[52][0] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
2 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][1] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
2 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][2] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
3 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][3] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
3 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[53][0] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
4 * pstv->out_xsize_plus_1[53];
|
|
fd->dma_para->fd_out_hw_pa[53][1] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
4 * pstv->out_xsize_plus_1[53];
|
|
fd->dma_para->fd_out_hw_pa[54][0] =
|
|
fd->dma_para->fd_out_hw_pa[51][1] + pstv->fd_wdma_size[51][1] +
|
|
pstv->fd_wdma_size[51][3] + pstv->fd_wdma_size[52][1] +
|
|
pstv->fd_wdma_size[52][3] + pstv->fd_wdma_size[53][1];
|
|
fd->dma_para->fd_out_hw_pa[54][1] =
|
|
fd->dma_para->fd_out_hw_pa[54][0] + pstv->out_xsize_plus_1[54];
|
|
fd->dma_para->fd_out_hw_pa[55][0] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
2 * pstv->out_xsize_plus_1[55];
|
|
fd->dma_para->fd_out_hw_pa[55][1] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
3 * pstv->out_xsize_plus_1[55];
|
|
fd->dma_para->fd_out_hw_pa[56][0] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
4 * pstv->out_xsize_plus_1[56];
|
|
|
|
/* 58~76 */
|
|
fd->dma_para->fd_out_hw_pa[58][0] = fd->fd_dma_hw.pa;
|
|
#if CHECK_SERVICE_0
|
|
fd->dma_para->fd_out_hw_pa[58][0] =
|
|
fd->dma_para->fd_out_hw_pa[54][0] + pstv->fd_wdma_size[54][0] +
|
|
pstv->fd_wdma_size[54][1] + pstv->fd_wdma_size[55][0] +
|
|
pstv->fd_wdma_size[55][1] + pstv->fd_wdma_size[56][0];
|
|
#endif
|
|
aie_alloc_normal(fd, 59, 76);
|
|
|
|
/* 77~85 */
|
|
fd->dma_para->fd_out_hw_pa[77][0] =
|
|
fd->dma_para->fd_out_hw_pa[76][1] + pstv->fd_wdma_size[76][1];
|
|
fd->dma_para->fd_out_hw_pa[77][1] =
|
|
fd->dma_para->fd_out_hw_pa[77][0] + pstv->out_xsize_plus_1[77];
|
|
fd->dma_para->fd_out_hw_pa[78][0] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
2 * pstv->out_xsize_plus_1[78];
|
|
fd->dma_para->fd_out_hw_pa[78][1] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
3 * pstv->out_xsize_plus_1[78];
|
|
fd->dma_para->fd_out_hw_pa[79][0] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
4 * pstv->out_xsize_plus_1[79];
|
|
fd->dma_para->fd_out_hw_pa[80][0] =
|
|
fd->dma_para->fd_out_hw_pa[77][0] + pstv->fd_wdma_size[77][0] +
|
|
pstv->fd_wdma_size[77][1] + pstv->fd_wdma_size[78][0] +
|
|
pstv->fd_wdma_size[78][1] + pstv->fd_wdma_size[79][0];
|
|
fd->dma_para->fd_out_hw_pa[80][1] =
|
|
fd->dma_para->fd_out_hw_pa[80][0] + pstv->fd_wdma_size[80][0] +
|
|
pstv->fd_wdma_size[80][2] + pstv->fd_wdma_size[81][0] +
|
|
pstv->fd_wdma_size[81][2] + pstv->fd_wdma_size[82][0];
|
|
fd->dma_para->fd_out_hw_pa[80][2] =
|
|
fd->dma_para->fd_out_hw_pa[80][0] + pstv->out_xsize_plus_1[80];
|
|
fd->dma_para->fd_out_hw_pa[80][3] =
|
|
fd->dma_para->fd_out_hw_pa[80][1] + pstv->out_xsize_plus_1[80];
|
|
fd->dma_para->fd_out_hw_pa[81][0] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
2 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][1] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
2 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][2] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
3 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][3] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
3 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[82][0] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
4 * pstv->out_xsize_plus_1[82];
|
|
fd->dma_para->fd_out_hw_pa[82][1] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
4 * pstv->out_xsize_plus_1[82];
|
|
fd->dma_para->fd_out_hw_pa[83][0] =
|
|
fd->dma_para->fd_out_hw_pa[80][1] + pstv->fd_wdma_size[80][1] +
|
|
pstv->fd_wdma_size[80][3] + pstv->fd_wdma_size[81][1] +
|
|
pstv->fd_wdma_size[81][3] + pstv->fd_wdma_size[82][1];
|
|
fd->dma_para->fd_out_hw_pa[83][1] =
|
|
fd->dma_para->fd_out_hw_pa[83][0] + pstv->out_xsize_plus_1[83];
|
|
fd->dma_para->fd_out_hw_pa[84][0] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
2 * pstv->out_xsize_plus_1[84];
|
|
fd->dma_para->fd_out_hw_pa[84][1] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
3 * pstv->out_xsize_plus_1[84];
|
|
fd->dma_para->fd_out_hw_pa[85][0] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
4 * pstv->out_xsize_plus_1[85];
|
|
#if CHECK_SERVICE_0
|
|
/* VA : except 28, 57, 86 */
|
|
/* 0~86 */
|
|
fd->dma_para->fd_out_hw_va[0][0] = fd->fd_dma_hw.va;
|
|
for (i = 1; i < fd_loop_num; i++) {
|
|
if (i == rpn2_loop_num || i == rpn1_loop_num ||
|
|
i == rpn0_loop_num)
|
|
continue;
|
|
for (j = 0; j < 4; j++) {
|
|
if (fd_wdma_en[i][j]) {
|
|
fd->dma_para->fd_out_hw_va[i][j] =
|
|
fd->fd_dma_hw.va +
|
|
fd->dma_para->fd_out_hw_pa[i][j] -
|
|
fd->fd_dma_hw.pa;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
currentPA = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
pstv->fd_wdma_size[83][0] + pstv->fd_wdma_size[83][1] +
|
|
pstv->fd_wdma_size[84][0] + pstv->fd_wdma_size[84][1] +
|
|
pstv->fd_wdma_size[85][0];
|
|
|
|
#if CHECK_SERVICE_0
|
|
currentVA = fd->dma_para->fd_out_hw_va[83][0] +
|
|
pstv->fd_wdma_size[83][0] + pstv->fd_wdma_size[83][1] +
|
|
pstv->fd_wdma_size[84][0] + pstv->fd_wdma_size[84][1] +
|
|
pstv->fd_wdma_size[85][0];
|
|
#endif
|
|
}
|
|
|
|
static void aie_arrange_kernel_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
void *currentVA = NULL;
|
|
dma_addr_t currentPA;
|
|
u8 i, j;
|
|
|
|
currentPA = fd->fd_kernel_hw.pa;
|
|
currentVA = fd->fd_kernel_hw.va;
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
if (fd_ker_rdma_size[i][j]) {
|
|
fd->dma_para->fd_kernel_pa[i][j] = currentPA;
|
|
fd->dma_para->fd_kernel_va[i][j] = currentVA;
|
|
currentPA += fd_ker_rdma_size[i][j];
|
|
currentVA += fd_ker_rdma_size[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
fd->dma_para->attr_kernel_pa[i][j] = currentPA;
|
|
fd->dma_para->attr_kernel_va[i][j] = currentVA;
|
|
currentPA += attr_ker_rdma_size[i][j];
|
|
currentVA += attr_ker_rdma_size[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void aie_arrange_attrdma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
void *currentVA = NULL;
|
|
dma_addr_t currentPA;
|
|
u8 i, j;
|
|
|
|
currentPA = fd->fd_attr_dma_hw.pa;
|
|
currentVA = fd->fd_attr_dma_hw.va;
|
|
|
|
/* attribute mode */
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
if (attr_wdma_en[i][j]) {
|
|
fd->dma_para->attr_out_hw_pa[i][j] = currentPA;
|
|
fd->dma_para->attr_out_hw_va[i][j] = currentVA;
|
|
currentPA += attr_wdma_size[i][j];
|
|
currentVA += attr_wdma_size[i][j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void aie_arrange_result_dma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
void *currentResultVA = NULL;
|
|
dma_addr_t currentResultPA;
|
|
u8 i;
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
currentResultPA = fd->fd_dma_result_hw.pa;
|
|
currentResultVA = fd->fd_dma_result_hw.va;
|
|
|
|
fd->dma_para->fd_out_hw_pa[rpn2_loop_num][0] = currentResultPA;
|
|
fd->dma_para->fd_out_hw_va[rpn2_loop_num][0] = currentResultVA;
|
|
currentResultPA += pstv->fd_wdma_size[rpn2_loop_num][0];
|
|
currentResultVA += pstv->fd_wdma_size[rpn2_loop_num][0];
|
|
fd->dma_para->fd_out_hw_pa[rpn1_loop_num][0] = currentResultPA;
|
|
fd->dma_para->fd_out_hw_va[rpn1_loop_num][0] = currentResultVA;
|
|
currentResultPA += pstv->fd_wdma_size[rpn1_loop_num][0];
|
|
currentResultVA += pstv->fd_wdma_size[rpn1_loop_num][0];
|
|
fd->dma_para->fd_out_hw_pa[rpn0_loop_num][0] = currentResultPA;
|
|
fd->dma_para->fd_out_hw_va[rpn0_loop_num][0] = currentResultVA;
|
|
currentResultPA += pstv->fd_wdma_size[rpn0_loop_num][0];
|
|
currentResultVA += pstv->fd_wdma_size[rpn0_loop_num][0];
|
|
|
|
fd->dma_para->attr_out_hw_pa[age_out_rgs][0] = currentResultPA;
|
|
fd->dma_para->attr_out_hw_va[age_out_rgs][0] = currentResultVA;
|
|
currentResultPA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
currentResultVA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
fd->dma_para->attr_out_hw_pa[gender_out_rgs][0] = currentResultPA;
|
|
fd->dma_para->attr_out_hw_va[gender_out_rgs][0] = currentResultVA;
|
|
currentResultPA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
currentResultVA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
fd->dma_para->attr_out_hw_pa[indian_out_rgs][0] = currentResultPA;
|
|
fd->dma_para->attr_out_hw_va[indian_out_rgs][0] = currentResultVA;
|
|
currentResultPA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
currentResultVA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
fd->dma_para->attr_out_hw_pa[race_out_rgs][0] = currentResultPA;
|
|
fd->dma_para->attr_out_hw_va[race_out_rgs][0] = currentResultVA;
|
|
currentResultPA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
currentResultVA += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
|
|
|
|
/* need to prepare 10 buffers to store 10 times result */
|
|
fd->dma_para->age_out_hw_pa[0] =
|
|
fd->dma_para->attr_out_hw_pa[age_out_rgs][0];
|
|
fd->dma_para->age_out_hw_va[0] =
|
|
fd->dma_para->attr_out_hw_va[age_out_rgs][0];
|
|
fd->dma_para->gender_out_hw_pa[0] =
|
|
fd->dma_para->attr_out_hw_pa[gender_out_rgs][0];
|
|
fd->dma_para->gender_out_hw_va[0] =
|
|
fd->dma_para->attr_out_hw_va[gender_out_rgs][0];
|
|
fd->dma_para->isIndian_out_hw_pa[0] =
|
|
fd->dma_para->attr_out_hw_pa[indian_out_rgs][0];
|
|
fd->dma_para->isIndian_out_hw_va[0] =
|
|
fd->dma_para->attr_out_hw_va[indian_out_rgs][0];
|
|
fd->dma_para->race_out_hw_pa[0] =
|
|
fd->dma_para->attr_out_hw_pa[race_out_rgs][0];
|
|
fd->dma_para->race_out_hw_va[0] =
|
|
fd->dma_para->attr_out_hw_va[race_out_rgs][0];
|
|
|
|
for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
|
|
fd->dma_para->age_out_hw_pa[i] =
|
|
fd->dma_para->age_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->age_out_hw_va[i] =
|
|
fd->dma_para->age_out_hw_va[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->gender_out_hw_pa[i] =
|
|
fd->dma_para->gender_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->gender_out_hw_va[i] =
|
|
fd->dma_para->gender_out_hw_va[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->isIndian_out_hw_pa[i] =
|
|
fd->dma_para->isIndian_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->isIndian_out_hw_va[i] =
|
|
fd->dma_para->isIndian_out_hw_va[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->race_out_hw_pa[i] =
|
|
fd->dma_para->race_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
|
|
fd->dma_para->race_out_hw_va[i] =
|
|
fd->dma_para->race_out_hw_va[i - 1] + ATTR_OUT_SIZE;
|
|
}
|
|
|
|
memset(fd->fd_dma_result_hw.va, 0, fd->fd_dma_result_hw.size);
|
|
|
|
}
|
|
#ifdef FLD
|
|
static void aie_arrange_fld_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
int input_index = 0;
|
|
int msb_bit_0 = 0, msb_bit_1 = 0, msb_bit_2 = 0, msb_bit_3 = 0;
|
|
int msb_bit_4 = 0, msb_bit_5 = 0, msb_bit_6 = 0, msb_bit_7 = 0;
|
|
int set_msb_bit = 0;
|
|
|
|
fd->dma_para->fld_blink_weight_va = fd->fld_blink_weight_hw.va;
|
|
fd->dma_para->fld_blink_weight_pa = fd->fld_blink_weight_hw.pa;
|
|
|
|
fd->dma_para->fld_output_va = fd->fld_output_hw.va;
|
|
fd->dma_para->fld_output_pa = fd->fld_output_hw.pa;
|
|
fd->fld_para->fld_output_va = fd->dma_para->fld_output_va;
|
|
fd->fld_para->fld_output_pa = fd->dma_para->fld_output_pa;
|
|
|
|
fd->dma_para->fld_cv_va[0] = fd->fld_cv_hw.va;
|
|
fd->dma_para->fld_cv_pa[0] = fd->fld_cv_hw.pa;
|
|
fd->dma_para->fld_fp_va[0] = fd->fld_fp_hw.va;
|
|
fd->dma_para->fld_fp_pa[0] = fd->fld_fp_hw.pa;
|
|
|
|
fd->dma_para->fld_leafnode_va[0] = fd->fld_leafnode_hw.va;
|
|
fd->dma_para->fld_leafnode_pa[0] = fd->fld_leafnode_hw.pa;
|
|
fd->dma_para->fld_tree02_va[0] = fd->fld_tree_02_hw.va;
|
|
fd->dma_para->fld_tree02_pa[0] = fd->fld_tree_02_hw.pa;
|
|
fd->dma_para->fld_tree13_va[0] = fd->fld_tree_13_hw.va;
|
|
fd->dma_para->fld_tree13_pa[0] = fd->fld_tree_13_hw.pa;
|
|
|
|
fd->dma_para->fld_cv_va[1] = fd->dma_para->fld_cv_va[0] + fld_cv_size_00;
|
|
fd->dma_para->fld_cv_pa[1] = fd->dma_para->fld_cv_pa[0] + fld_cv_size_00;
|
|
fd->dma_para->fld_fp_va[1] = fd->dma_para->fld_fp_va[0] + fld_fp_size;
|
|
fd->dma_para->fld_fp_pa[1] = fd->dma_para->fld_fp_pa[0] + fld_fp_size;
|
|
fd->dma_para->fld_leafnode_va[1] = fd->dma_para->fld_leafnode_va[0] + fld_leafnode_size;
|
|
fd->dma_para->fld_leafnode_pa[1] = fd->dma_para->fld_leafnode_pa[0] + fld_leafnode_size;
|
|
fd->dma_para->fld_tree02_va[1] = fd->dma_para->fld_tree02_va[0] + fld_tree_size;
|
|
fd->dma_para->fld_tree02_pa[1] = fd->dma_para->fld_tree02_pa[0] + fld_tree_size;
|
|
fd->dma_para->fld_tree13_va[1] = fd->dma_para->fld_tree13_va[0] + fld_tree_size;
|
|
fd->dma_para->fld_tree13_pa[1] = fd->dma_para->fld_tree13_pa[0] + fld_tree_size;
|
|
|
|
for (input_index = 1; input_index < FLD_MAX_INPUT - 1; input_index++) {
|
|
fd->dma_para->fld_cv_va[input_index + 1] = fd->dma_para->fld_cv_va[input_index] +
|
|
fld_cv_size;
|
|
fd->dma_para->fld_cv_pa[input_index + 1] = fd->dma_para->fld_cv_pa[input_index] +
|
|
fld_cv_size;
|
|
fd->dma_para->fld_fp_va[input_index + 1] = fd->dma_para->fld_fp_va[input_index] +
|
|
fld_fp_size;
|
|
fd->dma_para->fld_fp_pa[input_index + 1] = fd->dma_para->fld_fp_pa[input_index] +
|
|
fld_fp_size;
|
|
fd->dma_para->fld_leafnode_va[input_index + 1] =
|
|
fd->dma_para->fld_leafnode_va[input_index] + fld_leafnode_size;
|
|
fd->dma_para->fld_leafnode_pa[input_index + 1] =
|
|
fd->dma_para->fld_leafnode_pa[input_index] + fld_leafnode_size;
|
|
fd->dma_para->fld_tree02_va[input_index + 1] =
|
|
fd->dma_para->fld_tree02_va[input_index] + fld_tree_size;
|
|
fd->dma_para->fld_tree02_pa[input_index + 1] =
|
|
fd->dma_para->fld_tree02_pa[input_index] + fld_tree_size;
|
|
fd->dma_para->fld_tree13_va[input_index + 1] =
|
|
fd->dma_para->fld_tree13_va[input_index] + fld_tree_size;
|
|
fd->dma_para->fld_tree13_pa[input_index + 1] =
|
|
fd->dma_para->fld_tree13_pa[input_index] + fld_tree_size;
|
|
}
|
|
//fp
|
|
msb_bit_0 = (fd->dma_para->fld_fp_pa[0] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_fp_pa[1] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_fp_pa[2] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_fp_pa[3] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_fp_pa[4] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_fp_pa[5] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_fp_pa[6] & 0xf00000000) >> 32;
|
|
msb_bit_7 = (fd->dma_para->fld_fp_pa[7] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24 | msb_bit_7 << 28;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_3_0_7_MSB);
|
|
|
|
msb_bit_0 = (fd->dma_para->fld_fp_pa[8] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_fp_pa[9] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_fp_pa[10] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_fp_pa[11] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_fp_pa[12] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_fp_pa[13] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_fp_pa[14] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_3_8_15_MSB);
|
|
|
|
//cv
|
|
msb_bit_0 = (fd->dma_para->fld_cv_pa[0] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_cv_pa[1] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_cv_pa[2] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_cv_pa[3] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_cv_pa[4] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_cv_pa[5] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_cv_pa[6] & 0xf00000000) >> 32;
|
|
msb_bit_7 = (fd->dma_para->fld_cv_pa[7] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24 | msb_bit_7 << 28;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_2_0_7_MSB);
|
|
|
|
msb_bit_0 = (fd->dma_para->fld_cv_pa[8] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_cv_pa[9] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_cv_pa[10] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_cv_pa[11] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_cv_pa[12] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_cv_pa[13] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_cv_pa[14] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_2_8_15_MSB);
|
|
|
|
//leafnode
|
|
msb_bit_0 = (fd->dma_para->fld_leafnode_pa[0] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_leafnode_pa[1] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_leafnode_pa[2] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_leafnode_pa[3] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_leafnode_pa[4] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_leafnode_pa[5] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_leafnode_pa[6] & 0xf00000000) >> 32;
|
|
msb_bit_7 = (fd->dma_para->fld_leafnode_pa[7] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24 | msb_bit_7 << 28;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_SH_IN_BASE_ADDR_0_7_MSB);
|
|
|
|
msb_bit_0 = (fd->dma_para->fld_leafnode_pa[8] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_leafnode_pa[9] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_leafnode_pa[10] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_leafnode_pa[11] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_leafnode_pa[12] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_leafnode_pa[13] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_leafnode_pa[14] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_SH_IN_BASE_ADDR_8_15_MSB);
|
|
|
|
//02tree
|
|
msb_bit_0 = (fd->dma_para->fld_tree02_pa[0] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_tree02_pa[1] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_tree02_pa[2] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_tree02_pa[3] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_tree02_pa[4] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_tree02_pa[5] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_tree02_pa[6] & 0xf00000000) >> 32;
|
|
msb_bit_7 = (fd->dma_para->fld_tree02_pa[7] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24 | msb_bit_7 << 28;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_0_0_7_MSB);
|
|
|
|
msb_bit_0 = (fd->dma_para->fld_tree02_pa[8] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_tree02_pa[9] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_tree02_pa[10] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_tree02_pa[11] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_tree02_pa[12] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_tree02_pa[13] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_tree02_pa[14] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_0_8_15_MSB);
|
|
|
|
//13tree
|
|
msb_bit_0 = (fd->dma_para->fld_tree13_pa[0] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_tree13_pa[1] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_tree13_pa[2] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_tree13_pa[3] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_tree13_pa[4] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_tree13_pa[5] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_tree13_pa[6] & 0xf00000000) >> 32;
|
|
msb_bit_7 = (fd->dma_para->fld_tree13_pa[7] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24 | msb_bit_7 << 28;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_1_0_7_MSB);
|
|
|
|
|
|
msb_bit_0 = (fd->dma_para->fld_tree13_pa[8] & 0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->dma_para->fld_tree13_pa[9] & 0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->dma_para->fld_tree13_pa[10] & 0xf00000000) >> 32;
|
|
msb_bit_3 = (fd->dma_para->fld_tree13_pa[11] & 0xf00000000) >> 32;
|
|
msb_bit_4 = (fd->dma_para->fld_tree13_pa[12] & 0xf00000000) >> 32;
|
|
msb_bit_5 = (fd->dma_para->fld_tree13_pa[13] & 0xf00000000) >> 32;
|
|
msb_bit_6 = (fd->dma_para->fld_tree13_pa[14] & 0xf00000000) >> 32;
|
|
|
|
set_msb_bit = msb_bit_0 | msb_bit_1 << 4 | msb_bit_2 << 8 | msb_bit_3 << 12
|
|
| msb_bit_4 << 16 | msb_bit_5 << 20 | msb_bit_6 << 24;
|
|
|
|
writel(set_msb_bit, fd->fd_base + FLD_PL_IN_BASE_ADDR_1_8_15_MSB);
|
|
|
|
}
|
|
#endif
|
|
static void aie_update_fddma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
struct aie_static_info *pstv;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
/* 19~27 */
|
|
fd->dma_para->fd_out_hw_pa[19][0] =
|
|
fd->dma_para->fd_out_hw_pa[18][1] + pstv->fd_wdma_size[18][1];
|
|
fd->dma_para->fd_out_hw_pa[19][1] =
|
|
fd->dma_para->fd_out_hw_pa[19][0] + pstv->out_xsize_plus_1[19];
|
|
fd->dma_para->fd_out_hw_pa[20][0] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
2 * pstv->out_xsize_plus_1[20];
|
|
fd->dma_para->fd_out_hw_pa[20][1] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
3 * pstv->out_xsize_plus_1[20];
|
|
fd->dma_para->fd_out_hw_pa[21][0] = fd->dma_para->fd_out_hw_pa[19][0] +
|
|
4 * pstv->out_xsize_plus_1[21];
|
|
fd->dma_para->fd_out_hw_pa[22][0] =
|
|
fd->dma_para->fd_out_hw_pa[19][0] + pstv->fd_wdma_size[19][0] +
|
|
pstv->fd_wdma_size[19][1] + pstv->fd_wdma_size[20][0] +
|
|
pstv->fd_wdma_size[20][1] + pstv->fd_wdma_size[21][0];
|
|
fd->dma_para->fd_out_hw_pa[22][1] =
|
|
fd->dma_para->fd_out_hw_pa[22][0] + pstv->fd_wdma_size[22][0] +
|
|
pstv->fd_wdma_size[22][2] + pstv->fd_wdma_size[23][0] +
|
|
pstv->fd_wdma_size[23][2] + pstv->fd_wdma_size[24][0];
|
|
fd->dma_para->fd_out_hw_pa[22][2] =
|
|
fd->dma_para->fd_out_hw_pa[22][0] + pstv->out_xsize_plus_1[22];
|
|
fd->dma_para->fd_out_hw_pa[22][3] =
|
|
fd->dma_para->fd_out_hw_pa[22][1] + pstv->out_xsize_plus_1[22];
|
|
fd->dma_para->fd_out_hw_pa[23][0] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
2 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][1] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
2 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][2] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
3 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[23][3] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
3 * pstv->out_xsize_plus_1[23];
|
|
fd->dma_para->fd_out_hw_pa[24][0] = fd->dma_para->fd_out_hw_pa[22][0] +
|
|
4 * pstv->out_xsize_plus_1[24];
|
|
fd->dma_para->fd_out_hw_pa[24][1] = fd->dma_para->fd_out_hw_pa[22][1] +
|
|
4 * pstv->out_xsize_plus_1[24];
|
|
fd->dma_para->fd_out_hw_pa[25][0] =
|
|
fd->dma_para->fd_out_hw_pa[22][1] + pstv->fd_wdma_size[22][1] +
|
|
pstv->fd_wdma_size[22][3] + pstv->fd_wdma_size[23][1] +
|
|
pstv->fd_wdma_size[23][3] + pstv->fd_wdma_size[24][1];
|
|
fd->dma_para->fd_out_hw_pa[25][1] =
|
|
fd->dma_para->fd_out_hw_pa[25][0] + pstv->out_xsize_plus_1[25];
|
|
fd->dma_para->fd_out_hw_pa[26][0] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
2 * pstv->out_xsize_plus_1[26];
|
|
fd->dma_para->fd_out_hw_pa[26][1] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
3 * pstv->out_xsize_plus_1[26];
|
|
fd->dma_para->fd_out_hw_pa[27][0] = fd->dma_para->fd_out_hw_pa[25][0] +
|
|
4 * pstv->out_xsize_plus_1[27];
|
|
|
|
/* 48~56 */
|
|
fd->dma_para->fd_out_hw_pa[48][0] =
|
|
fd->dma_para->fd_out_hw_pa[47][1] + pstv->fd_wdma_size[47][1];
|
|
fd->dma_para->fd_out_hw_pa[48][1] =
|
|
fd->dma_para->fd_out_hw_pa[48][0] + pstv->out_xsize_plus_1[48];
|
|
fd->dma_para->fd_out_hw_pa[49][0] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
2 * pstv->out_xsize_plus_1[49];
|
|
fd->dma_para->fd_out_hw_pa[49][1] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
3 * pstv->out_xsize_plus_1[49];
|
|
fd->dma_para->fd_out_hw_pa[50][0] = fd->dma_para->fd_out_hw_pa[48][0] +
|
|
4 * pstv->out_xsize_plus_1[50];
|
|
fd->dma_para->fd_out_hw_pa[51][0] =
|
|
fd->dma_para->fd_out_hw_pa[48][0] + pstv->fd_wdma_size[48][0] +
|
|
pstv->fd_wdma_size[48][1] + pstv->fd_wdma_size[49][0] +
|
|
pstv->fd_wdma_size[49][1] + pstv->fd_wdma_size[50][0];
|
|
fd->dma_para->fd_out_hw_pa[51][1] =
|
|
fd->dma_para->fd_out_hw_pa[51][0] + pstv->fd_wdma_size[51][0] +
|
|
pstv->fd_wdma_size[51][2] + pstv->fd_wdma_size[52][0] +
|
|
pstv->fd_wdma_size[52][2] + pstv->fd_wdma_size[53][0];
|
|
fd->dma_para->fd_out_hw_pa[51][2] =
|
|
fd->dma_para->fd_out_hw_pa[51][0] + pstv->out_xsize_plus_1[51];
|
|
fd->dma_para->fd_out_hw_pa[51][3] =
|
|
fd->dma_para->fd_out_hw_pa[51][1] + pstv->out_xsize_plus_1[51];
|
|
fd->dma_para->fd_out_hw_pa[52][0] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
2 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][1] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
2 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][2] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
3 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[52][3] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
3 * pstv->out_xsize_plus_1[52];
|
|
fd->dma_para->fd_out_hw_pa[53][0] = fd->dma_para->fd_out_hw_pa[51][0] +
|
|
4 * pstv->out_xsize_plus_1[53];
|
|
fd->dma_para->fd_out_hw_pa[53][1] = fd->dma_para->fd_out_hw_pa[51][1] +
|
|
4 * pstv->out_xsize_plus_1[53];
|
|
fd->dma_para->fd_out_hw_pa[54][0] =
|
|
fd->dma_para->fd_out_hw_pa[51][1] + pstv->fd_wdma_size[51][1] +
|
|
pstv->fd_wdma_size[51][3] + pstv->fd_wdma_size[52][1] +
|
|
pstv->fd_wdma_size[52][3] + pstv->fd_wdma_size[53][1];
|
|
fd->dma_para->fd_out_hw_pa[54][1] =
|
|
fd->dma_para->fd_out_hw_pa[54][0] + pstv->out_xsize_plus_1[54];
|
|
fd->dma_para->fd_out_hw_pa[55][0] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
2 * pstv->out_xsize_plus_1[55];
|
|
fd->dma_para->fd_out_hw_pa[55][1] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
3 * pstv->out_xsize_plus_1[55];
|
|
fd->dma_para->fd_out_hw_pa[56][0] = fd->dma_para->fd_out_hw_pa[54][0] +
|
|
4 * pstv->out_xsize_plus_1[56];
|
|
/* 77~85 */
|
|
fd->dma_para->fd_out_hw_pa[77][0] =
|
|
fd->dma_para->fd_out_hw_pa[76][1] + pstv->fd_wdma_size[76][1];
|
|
fd->dma_para->fd_out_hw_pa[77][1] =
|
|
fd->dma_para->fd_out_hw_pa[77][0] + pstv->out_xsize_plus_1[77];
|
|
fd->dma_para->fd_out_hw_pa[78][0] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
2 * pstv->out_xsize_plus_1[78];
|
|
fd->dma_para->fd_out_hw_pa[78][1] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
3 * pstv->out_xsize_plus_1[78];
|
|
fd->dma_para->fd_out_hw_pa[79][0] = fd->dma_para->fd_out_hw_pa[77][0] +
|
|
4 * pstv->out_xsize_plus_1[79];
|
|
fd->dma_para->fd_out_hw_pa[80][0] =
|
|
fd->dma_para->fd_out_hw_pa[77][0] + pstv->fd_wdma_size[77][0] +
|
|
pstv->fd_wdma_size[77][1] + pstv->fd_wdma_size[78][0] +
|
|
pstv->fd_wdma_size[78][1] + pstv->fd_wdma_size[79][0];
|
|
fd->dma_para->fd_out_hw_pa[80][1] =
|
|
fd->dma_para->fd_out_hw_pa[80][0] + pstv->fd_wdma_size[80][0] +
|
|
pstv->fd_wdma_size[80][2] + pstv->fd_wdma_size[81][0] +
|
|
pstv->fd_wdma_size[81][2] + pstv->fd_wdma_size[82][0];
|
|
fd->dma_para->fd_out_hw_pa[80][2] =
|
|
fd->dma_para->fd_out_hw_pa[80][0] + pstv->out_xsize_plus_1[80];
|
|
fd->dma_para->fd_out_hw_pa[80][3] =
|
|
fd->dma_para->fd_out_hw_pa[80][1] + pstv->out_xsize_plus_1[80];
|
|
fd->dma_para->fd_out_hw_pa[81][0] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
2 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][1] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
2 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][2] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
3 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[81][3] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
3 * pstv->out_xsize_plus_1[81];
|
|
fd->dma_para->fd_out_hw_pa[82][0] = fd->dma_para->fd_out_hw_pa[80][0] +
|
|
4 * pstv->out_xsize_plus_1[82];
|
|
fd->dma_para->fd_out_hw_pa[82][1] = fd->dma_para->fd_out_hw_pa[80][1] +
|
|
4 * pstv->out_xsize_plus_1[82];
|
|
fd->dma_para->fd_out_hw_pa[83][0] =
|
|
fd->dma_para->fd_out_hw_pa[80][1] + pstv->fd_wdma_size[80][1] +
|
|
pstv->fd_wdma_size[80][3] + pstv->fd_wdma_size[81][1] +
|
|
pstv->fd_wdma_size[81][3] + pstv->fd_wdma_size[82][1];
|
|
fd->dma_para->fd_out_hw_pa[83][1] =
|
|
fd->dma_para->fd_out_hw_pa[83][0] + pstv->out_xsize_plus_1[83];
|
|
fd->dma_para->fd_out_hw_pa[84][0] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
2 * pstv->out_xsize_plus_1[84];
|
|
fd->dma_para->fd_out_hw_pa[84][1] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
3 * pstv->out_xsize_plus_1[84];
|
|
fd->dma_para->fd_out_hw_pa[85][0] = fd->dma_para->fd_out_hw_pa[83][0] +
|
|
4 * pstv->out_xsize_plus_1[85];
|
|
|
|
/* VA : except 28, 57, 86 */
|
|
/* 0~86 */
|
|
#if CHECK_SERVICE_0
|
|
fd->dma_para->fd_out_hw_va[0][0] = fd->fd_dma_hw.va;
|
|
for (i = 1; i < fd_loop_num; i++) {
|
|
if (i == rpn2_loop_num || i == rpn1_loop_num ||
|
|
i == rpn0_loop_num)
|
|
continue;
|
|
for (j = 0; j < 4; j++) {
|
|
if (fd_wdma_en[i][j]) {
|
|
fd->dma_para->fd_out_hw_va[i][j] =
|
|
fd->fd_dma_hw.va +
|
|
fd->dma_para->fd_out_hw_pa[i][j] -
|
|
fd->fd_dma_hw.pa;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
static void aie_free_sec_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
aie_free_iova(fd, &fd->rs_output_hw);
|
|
aie_free_dmabuf(fd, &fd->rs_output_hw);
|
|
}
|
|
|
|
static void aie_free_dram_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
//aie_imem_free(fd, &fd->rs_cfg_data);
|
|
aie_free_iova(fd, &fd->rs_cfg_data);
|
|
aie_free_va(fd, &fd->rs_cfg_data);
|
|
aie_free_dmabuf(fd, &fd->rs_cfg_data);
|
|
|
|
//aie_imem_free(fd, &fd->fd_cfg_data);
|
|
aie_free_iova(fd, &fd->fd_cfg_data);
|
|
aie_free_va(fd, &fd->fd_cfg_data);
|
|
aie_free_dmabuf(fd, &fd->fd_cfg_data);
|
|
|
|
//aie_imem_free(fd, &fd->yuv2rgb_cfg_data);
|
|
aie_free_iova(fd, &fd->yuv2rgb_cfg_data);
|
|
aie_free_va(fd, &fd->yuv2rgb_cfg_data);
|
|
aie_free_dmabuf(fd, &fd->yuv2rgb_cfg_data);
|
|
|
|
}
|
|
|
|
static void aie_free_output_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
aie_free_iova(fd, &fd->rs_output_hw);
|
|
aie_free_va(fd, &fd->rs_output_hw);
|
|
aie_free_dmabuf(fd, &fd->rs_output_hw);
|
|
|
|
}
|
|
|
|
static void aie_free_fddma_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
//aie_imem_free(fd, &fd->fd_dma_hw);
|
|
aie_free_iova(fd, &fd->fd_dma_hw);
|
|
aie_free_va(fd, &fd->fd_dma_hw);
|
|
aie_free_dmabuf(fd, &fd->fd_dma_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fd_kernel_hw);
|
|
aie_free_iova(fd, &fd->fd_kernel_hw);
|
|
aie_free_va(fd, &fd->fd_kernel_hw);
|
|
aie_free_dmabuf(fd, &fd->fd_kernel_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fd_attr_dma_hw);
|
|
aie_free_iova(fd, &fd->fd_attr_dma_hw);
|
|
aie_free_va(fd, &fd->fd_attr_dma_hw);
|
|
aie_free_dmabuf(fd, &fd->fd_attr_dma_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fd_dma_result_hw);
|
|
aie_free_iova(fd, &fd->fd_dma_result_hw);
|
|
aie_free_va(fd, &fd->fd_dma_result_hw);
|
|
aie_free_dmabuf(fd, &fd->fd_dma_result_hw);
|
|
|
|
}
|
|
#ifdef FLD
|
|
static void aie_free_fld_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
//aie_imem_free(fd, &fd->fld_blink_weight_hw);
|
|
aie_free_iova(fd, &fd->fld_blink_weight_hw);
|
|
aie_free_va(fd, &fd->fld_blink_weight_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_blink_weight_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_cv_hw);
|
|
aie_free_iova(fd, &fd->fld_cv_hw);
|
|
aie_free_va(fd, &fd->fld_cv_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_cv_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_fp_hw);
|
|
aie_free_iova(fd, &fd->fld_fp_hw);
|
|
aie_free_va(fd, &fd->fld_fp_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_fp_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_leafnode_hw);
|
|
aie_free_iova(fd, &fd->fld_leafnode_hw);
|
|
aie_free_va(fd, &fd->fld_leafnode_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_leafnode_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_tree_02_hw);
|
|
aie_free_iova(fd, &fd->fld_tree_02_hw);
|
|
aie_free_va(fd, &fd->fld_tree_02_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_tree_02_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_tree_13_hw);
|
|
aie_free_iova(fd, &fd->fld_tree_13_hw);
|
|
aie_free_va(fd, &fd->fld_tree_13_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_tree_13_hw);
|
|
|
|
//aie_imem_free(fd, &fd->fld_output_hw);
|
|
aie_free_iova(fd, &fd->fld_output_hw);
|
|
aie_free_va(fd, &fd->fld_output_hw);
|
|
aie_free_dmabuf(fd, &fd->fld_output_hw);
|
|
}
|
|
#endif
|
|
#if CHECK_SERVICE_0
|
|
static int aie_copy_fw(struct mtk_aie_dev *fd, const char *name, void *buf,
|
|
unsigned int size)
|
|
{
|
|
int ret = 0;
|
|
const struct firmware *fw = NULL;
|
|
|
|
ret = request_firmware(&fw, name, fd->dev);
|
|
if (ret) {
|
|
dev_info(fd->dev, "%s: fail to load firmware %s\n", __func__,
|
|
name);
|
|
return ret;
|
|
}
|
|
|
|
if (size < fw->size) {
|
|
release_firmware(fw);
|
|
return -EINVAL;
|
|
}
|
|
|
|
memcpy(buf, fw->data, fw->size);
|
|
release_firmware(fw);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
static int aie_load_fw(struct mtk_aie_dev *fd)
|
|
{
|
|
int ret = 0;
|
|
int i = 0;
|
|
|
|
memcpy(fd->base_para->fd_fd_cfg_va, &fdvt_fd_confi_frame01[0], fd->fd_fd_cfg_size);
|
|
memcpy(fd->base_para->fd_rs_cfg_va, &fdvt_rs_confi_frame01[0], fd->fd_rs_cfg_size);
|
|
memcpy(fd->base_para->fd_yuv2rgb_cfg_va, &fdvt_yuv2rgb_confi_frame01[0],
|
|
fd->fd_yuv2rgb_cfg_size);
|
|
|
|
|
|
memcpy(fd->base_para->attr_fd_cfg_va[0], &attr_fd_confi_frame01[0], fd->attr_fd_cfg_size);
|
|
memcpy(fd->base_para->attr_yuv2rgb_cfg_va[0], &attr_yuv2rgb_confi_frame01[0],
|
|
fd->attr_yuv2rgb_cfg_size);
|
|
|
|
for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
|
|
memcpy(fd->base_para->attr_fd_cfg_va[i],
|
|
fd->base_para->attr_fd_cfg_va[0], fd->attr_fd_cfg_size);
|
|
memcpy(fd->base_para->attr_yuv2rgb_cfg_va[i],
|
|
fd->base_para->attr_yuv2rgb_cfg_va[0],
|
|
fd->attr_yuv2rgb_cfg_size);
|
|
}
|
|
|
|
/*0~10*/
|
|
memcpy(fd->dma_para->fd_kernel_va[0][0], &fdvt_kernel_bias_loop00_0_frame01[0],
|
|
fd_ker_rdma_size[0][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[0][1], &fdvt_kernel_bias_loop00_1_frame01[0],
|
|
fd_ker_rdma_size[0][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[1][0], &fdvt_kernel_bias_loop01_0_frame01[0],
|
|
fd_ker_rdma_size[1][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[1][1], &fdvt_kernel_bias_loop01_1_frame01[0],
|
|
fd_ker_rdma_size[1][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[2][0], &fdvt_kernel_bias_loop02_0_frame01[0],
|
|
fd_ker_rdma_size[2][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[2][1], &fdvt_kernel_bias_loop02_1_frame01[0],
|
|
fd_ker_rdma_size[2][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[3][0], &fdvt_kernel_bias_loop03_0_frame01[0],
|
|
fd_ker_rdma_size[3][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[3][1], &fdvt_kernel_bias_loop03_1_frame01[0],
|
|
fd_ker_rdma_size[3][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[4][0], &fdvt_kernel_bias_loop04_0_frame01[0],
|
|
fd_ker_rdma_size[4][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[4][1], &fdvt_kernel_bias_loop04_1_frame01[0],
|
|
fd_ker_rdma_size[4][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[5][0], &fdvt_kernel_bias_loop05_0_frame01[0],
|
|
fd_ker_rdma_size[5][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[5][1], &fdvt_kernel_bias_loop05_1_frame01[0],
|
|
fd_ker_rdma_size[5][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[6][0], &fdvt_kernel_bias_loop06_0_frame01[0],
|
|
fd_ker_rdma_size[6][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[6][1], &fdvt_kernel_bias_loop06_1_frame01[0],
|
|
fd_ker_rdma_size[6][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[7][0], &fdvt_kernel_bias_loop07_0_frame01[0],
|
|
fd_ker_rdma_size[7][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[7][1], &fdvt_kernel_bias_loop07_1_frame01[0],
|
|
fd_ker_rdma_size[7][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[8][0], &fdvt_kernel_bias_loop08_0_frame01[0],
|
|
fd_ker_rdma_size[8][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[8][1], &fdvt_kernel_bias_loop08_1_frame01[0],
|
|
fd_ker_rdma_size[8][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[9][0], &fdvt_kernel_bias_loop09_0_frame01[0],
|
|
fd_ker_rdma_size[9][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[9][1], &fdvt_kernel_bias_loop09_1_frame01[0],
|
|
fd_ker_rdma_size[9][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[10][0], &fdvt_kernel_bias_loop10_0_frame01[0],
|
|
fd_ker_rdma_size[10][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[10][1], &fdvt_kernel_bias_loop10_1_frame01[0],
|
|
fd_ker_rdma_size[10][1]);
|
|
|
|
/*11~20*/
|
|
memcpy(fd->dma_para->fd_kernel_va[11][0], &fdvt_kernel_bias_loop11_0_frame01[0],
|
|
fd_ker_rdma_size[11][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[11][1], &fdvt_kernel_bias_loop11_1_frame01[0],
|
|
fd_ker_rdma_size[11][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[12][0], &fdvt_kernel_bias_loop12_0_frame01[0],
|
|
fd_ker_rdma_size[12][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[12][1], &fdvt_kernel_bias_loop12_1_frame01[0],
|
|
fd_ker_rdma_size[12][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[13][0], &fdvt_kernel_bias_loop13_0_frame01[0],
|
|
fd_ker_rdma_size[13][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[13][1], &fdvt_kernel_bias_loop13_1_frame01[0],
|
|
fd_ker_rdma_size[13][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[14][0], &fdvt_kernel_bias_loop14_0_frame01[0],
|
|
fd_ker_rdma_size[14][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[14][1], &fdvt_kernel_bias_loop14_1_frame01[0],
|
|
fd_ker_rdma_size[14][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[15][0], &fdvt_kernel_bias_loop15_0_frame01[0],
|
|
fd_ker_rdma_size[15][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[15][1], &fdvt_kernel_bias_loop15_1_frame01[0],
|
|
fd_ker_rdma_size[15][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[16][0], &fdvt_kernel_bias_loop16_0_frame01[0],
|
|
fd_ker_rdma_size[16][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[16][1], &fdvt_kernel_bias_loop16_1_frame01[0],
|
|
fd_ker_rdma_size[16][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[17][0], &fdvt_kernel_bias_loop17_0_frame01[0],
|
|
fd_ker_rdma_size[17][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[17][1], &fdvt_kernel_bias_loop17_1_frame01,
|
|
fd_ker_rdma_size[17][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[18][0], &fdvt_kernel_bias_loop18_0_frame01[0],
|
|
fd_ker_rdma_size[18][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[18][1], &fdvt_kernel_bias_loop18_1_frame01[0],
|
|
fd_ker_rdma_size[18][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[19][0], &fdvt_kernel_bias_loop19_0_frame01[0],
|
|
fd_ker_rdma_size[19][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[19][1], &fdvt_kernel_bias_loop19_1_frame01[0],
|
|
fd_ker_rdma_size[19][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[20][0], &fdvt_kernel_bias_loop20_0_frame01[0],
|
|
fd_ker_rdma_size[20][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[20][1], &fdvt_kernel_bias_loop20_1_frame01[0],
|
|
fd_ker_rdma_size[20][1]);
|
|
|
|
/*21~30: except 28*/
|
|
memcpy(fd->dma_para->fd_kernel_va[21][0], &fdvt_kernel_bias_loop21_0_frame01[0],
|
|
fd_ker_rdma_size[21][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[21][1], &fdvt_kernel_bias_loop21_1_frame01[0],
|
|
fd_ker_rdma_size[21][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[22][0], &fdvt_kernel_bias_loop22_0_frame01[0],
|
|
fd_ker_rdma_size[22][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[22][1], &fdvt_kernel_bias_loop22_1_frame01[0],
|
|
fd_ker_rdma_size[22][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[23][0], &fdvt_kernel_bias_loop23_0_frame01[0],
|
|
fd_ker_rdma_size[23][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[23][1], &fdvt_kernel_bias_loop23_1_frame01[0],
|
|
fd_ker_rdma_size[23][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[24][0], &fdvt_kernel_bias_loop24_0_frame01[0],
|
|
fd_ker_rdma_size[24][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[24][1], &fdvt_kernel_bias_loop24_1_frame01[0],
|
|
fd_ker_rdma_size[24][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[25][0], &fdvt_kernel_bias_loop25_0_frame01[0],
|
|
fd_ker_rdma_size[25][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[25][1], &fdvt_kernel_bias_loop25_1_frame01[0],
|
|
fd_ker_rdma_size[25][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[26][0], &fdvt_kernel_bias_loop26_0_frame01[0],
|
|
fd_ker_rdma_size[26][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[26][1], &fdvt_kernel_bias_loop26_1_frame01[0],
|
|
fd_ker_rdma_size[26][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[27][0], &fdvt_kernel_bias_loop27_0_frame01[0],
|
|
fd_ker_rdma_size[27][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[27][1], &fdvt_kernel_bias_loop27_1_frame01[0],
|
|
fd_ker_rdma_size[27][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[29][0], &fdvt_kernel_bias_loop29_0_frame01[0],
|
|
fd_ker_rdma_size[29][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[29][1], &fdvt_kernel_bias_loop29_1_frame01[0],
|
|
fd_ker_rdma_size[29][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[30][0], &fdvt_kernel_bias_loop30_0_frame01[0],
|
|
fd_ker_rdma_size[30][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[30][1], &fdvt_kernel_bias_loop30_1_frame01[0],
|
|
fd_ker_rdma_size[30][1]);
|
|
|
|
/*31~40*/
|
|
memcpy(fd->dma_para->fd_kernel_va[31][0], &fdvt_kernel_bias_loop31_0_frame01[0],
|
|
fd_ker_rdma_size[31][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[31][1], &fdvt_kernel_bias_loop31_1_frame01[0],
|
|
fd_ker_rdma_size[31][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[32][0], &fdvt_kernel_bias_loop32_0_frame01[0],
|
|
fd_ker_rdma_size[32][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[32][1], &fdvt_kernel_bias_loop32_1_frame01[0],
|
|
fd_ker_rdma_size[32][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[33][0], &fdvt_kernel_bias_loop33_0_frame01[0],
|
|
fd_ker_rdma_size[33][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[33][1], &fdvt_kernel_bias_loop33_1_frame01[0],
|
|
fd_ker_rdma_size[33][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[34][0], &fdvt_kernel_bias_loop34_0_frame01[0],
|
|
fd_ker_rdma_size[34][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[34][1], &fdvt_kernel_bias_loop34_1_frame01[0],
|
|
fd_ker_rdma_size[34][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[35][0], &fdvt_kernel_bias_loop35_0_frame01[0],
|
|
fd_ker_rdma_size[35][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[35][1], &fdvt_kernel_bias_loop35_1_frame01[0],
|
|
fd_ker_rdma_size[35][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[36][0], &fdvt_kernel_bias_loop36_0_frame01[0],
|
|
fd_ker_rdma_size[36][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[36][1], &fdvt_kernel_bias_loop36_1_frame01[0],
|
|
fd_ker_rdma_size[36][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[37][0], &fdvt_kernel_bias_loop37_0_frame01[0],
|
|
fd_ker_rdma_size[37][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[37][1], &fdvt_kernel_bias_loop37_1_frame01[0],
|
|
fd_ker_rdma_size[37][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[38][0], &fdvt_kernel_bias_loop38_0_frame01[0],
|
|
fd_ker_rdma_size[38][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[38][1], &fdvt_kernel_bias_loop38_1_frame01[0],
|
|
fd_ker_rdma_size[38][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[39][0], &fdvt_kernel_bias_loop39_0_frame01[0],
|
|
fd_ker_rdma_size[39][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[39][1], &fdvt_kernel_bias_loop39_1_frame01[0],
|
|
fd_ker_rdma_size[39][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[40][0], &fdvt_kernel_bias_loop40_0_frame01[0],
|
|
fd_ker_rdma_size[40][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[40][1], &fdvt_kernel_bias_loop40_1_frame01[0],
|
|
fd_ker_rdma_size[40][1]);
|
|
|
|
/*41~50*/
|
|
memcpy(fd->dma_para->fd_kernel_va[41][0], &fdvt_kernel_bias_loop41_0_frame01[0],
|
|
fd_ker_rdma_size[41][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[41][1], &fdvt_kernel_bias_loop41_1_frame01[0],
|
|
fd_ker_rdma_size[41][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[42][0], &fdvt_kernel_bias_loop42_0_frame01[0],
|
|
fd_ker_rdma_size[42][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[42][1], &fdvt_kernel_bias_loop42_1_frame01[0],
|
|
fd_ker_rdma_size[42][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[43][0], &fdvt_kernel_bias_loop43_0_frame01[0],
|
|
fd_ker_rdma_size[43][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[43][1], &fdvt_kernel_bias_loop43_1_frame01[0],
|
|
fd_ker_rdma_size[43][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[44][0], &fdvt_kernel_bias_loop44_0_frame01[0],
|
|
fd_ker_rdma_size[44][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[44][1], &fdvt_kernel_bias_loop44_1_frame01[0],
|
|
fd_ker_rdma_size[44][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[45][0], &fdvt_kernel_bias_loop45_0_frame01[0],
|
|
fd_ker_rdma_size[45][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[45][1], &fdvt_kernel_bias_loop45_1_frame01[0],
|
|
fd_ker_rdma_size[45][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[46][0], &fdvt_kernel_bias_loop46_0_frame01[0],
|
|
fd_ker_rdma_size[46][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[46][1], &fdvt_kernel_bias_loop46_1_frame01[0],
|
|
fd_ker_rdma_size[46][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[47][0], &fdvt_kernel_bias_loop47_0_frame01[0],
|
|
fd_ker_rdma_size[47][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[47][1], &fdvt_kernel_bias_loop47_1_frame01[0],
|
|
fd_ker_rdma_size[47][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[48][0], &fdvt_kernel_bias_loop48_0_frame01[0],
|
|
fd_ker_rdma_size[48][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[48][1], &fdvt_kernel_bias_loop48_1_frame01[0],
|
|
fd_ker_rdma_size[48][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[49][0], &fdvt_kernel_bias_loop49_0_frame01[0],
|
|
fd_ker_rdma_size[49][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[49][1], &fdvt_kernel_bias_loop49_1_frame01[0],
|
|
fd_ker_rdma_size[49][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[50][0], &fdvt_kernel_bias_loop50_0_frame01[0],
|
|
fd_ker_rdma_size[50][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[50][1], &fdvt_kernel_bias_loop50_1_frame01[0],
|
|
fd_ker_rdma_size[50][1]);
|
|
|
|
/*51~60: except 57*/
|
|
memcpy(fd->dma_para->fd_kernel_va[51][0], &fdvt_kernel_bias_loop51_0_frame01[0],
|
|
fd_ker_rdma_size[51][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[51][1], &fdvt_kernel_bias_loop51_1_frame01[0],
|
|
fd_ker_rdma_size[51][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[52][0], &fdvt_kernel_bias_loop52_0_frame01[0],
|
|
fd_ker_rdma_size[52][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[52][1], &fdvt_kernel_bias_loop52_1_frame01[0],
|
|
fd_ker_rdma_size[52][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[53][0], &fdvt_kernel_bias_loop53_0_frame01[0],
|
|
fd_ker_rdma_size[53][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[53][1], &fdvt_kernel_bias_loop53_1_frame01[0],
|
|
fd_ker_rdma_size[53][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[54][0], &fdvt_kernel_bias_loop54_0_frame01[0],
|
|
fd_ker_rdma_size[54][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[54][1], &fdvt_kernel_bias_loop54_1_frame01[0],
|
|
fd_ker_rdma_size[54][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[55][0], &fdvt_kernel_bias_loop55_0_frame01[0],
|
|
fd_ker_rdma_size[55][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[55][1], &fdvt_kernel_bias_loop55_1_frame01[0],
|
|
fd_ker_rdma_size[55][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[56][0], &fdvt_kernel_bias_loop56_0_frame01[0],
|
|
fd_ker_rdma_size[56][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[56][1], &fdvt_kernel_bias_loop56_1_frame01[0],
|
|
fd_ker_rdma_size[56][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[58][0], &fdvt_kernel_bias_loop58_0_frame01[0],
|
|
fd_ker_rdma_size[58][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[58][1], &fdvt_kernel_bias_loop58_1_frame01[0],
|
|
fd_ker_rdma_size[58][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[59][0], &fdvt_kernel_bias_loop59_0_frame01[0],
|
|
fd_ker_rdma_size[59][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[59][1], &fdvt_kernel_bias_loop59_1_frame01[0],
|
|
fd_ker_rdma_size[59][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[60][0], &fdvt_kernel_bias_loop60_0_frame01[0],
|
|
fd_ker_rdma_size[60][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[60][1], &fdvt_kernel_bias_loop60_1_frame01[0],
|
|
fd_ker_rdma_size[60][1]);
|
|
|
|
/*61~70*/
|
|
memcpy(fd->dma_para->fd_kernel_va[61][0], &fdvt_kernel_bias_loop61_0_frame01[0],
|
|
fd_ker_rdma_size[61][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[61][1], &fdvt_kernel_bias_loop61_1_frame01[0],
|
|
fd_ker_rdma_size[61][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[62][0], &fdvt_kernel_bias_loop62_0_frame01[0],
|
|
fd_ker_rdma_size[62][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[62][1], &fdvt_kernel_bias_loop62_1_frame01[0],
|
|
fd_ker_rdma_size[62][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[63][0], &fdvt_kernel_bias_loop63_0_frame01[0],
|
|
fd_ker_rdma_size[63][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[63][1], &fdvt_kernel_bias_loop63_1_frame01[0],
|
|
fd_ker_rdma_size[63][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[64][0], &fdvt_kernel_bias_loop64_0_frame01[0],
|
|
fd_ker_rdma_size[64][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[64][1], &fdvt_kernel_bias_loop64_1_frame01[0],
|
|
fd_ker_rdma_size[64][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[65][0], &fdvt_kernel_bias_loop65_0_frame01[0],
|
|
fd_ker_rdma_size[65][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[65][1], &fdvt_kernel_bias_loop65_1_frame01[0],
|
|
fd_ker_rdma_size[65][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[66][0], &fdvt_kernel_bias_loop66_0_frame01[0],
|
|
fd_ker_rdma_size[66][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[66][1], &fdvt_kernel_bias_loop66_1_frame01[0],
|
|
fd_ker_rdma_size[66][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[67][0], &fdvt_kernel_bias_loop67_0_frame01[0],
|
|
fd_ker_rdma_size[67][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[67][1], &fdvt_kernel_bias_loop67_1_frame01[0],
|
|
fd_ker_rdma_size[67][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[68][0], &fdvt_kernel_bias_loop68_0_frame01[0],
|
|
fd_ker_rdma_size[68][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[68][1], &fdvt_kernel_bias_loop68_1_frame01[0],
|
|
fd_ker_rdma_size[68][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[69][0], &fdvt_kernel_bias_loop69_0_frame01[0],
|
|
fd_ker_rdma_size[69][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[69][1], &fdvt_kernel_bias_loop69_1_frame01[0],
|
|
fd_ker_rdma_size[69][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[70][0], &fdvt_kernel_bias_loop70_0_frame01[0],
|
|
fd_ker_rdma_size[70][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[70][1], &fdvt_kernel_bias_loop70_1_frame01[0],
|
|
fd_ker_rdma_size[70][1]);
|
|
|
|
/*71~80*/
|
|
memcpy(fd->dma_para->fd_kernel_va[71][0], &fdvt_kernel_bias_loop71_0_frame01[0],
|
|
fd_ker_rdma_size[71][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[71][1], &fdvt_kernel_bias_loop71_1_frame01[0],
|
|
fd_ker_rdma_size[71][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[72][0], &fdvt_kernel_bias_loop72_0_frame01[0],
|
|
fd_ker_rdma_size[72][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[72][1], &fdvt_kernel_bias_loop72_1_frame01[0],
|
|
fd_ker_rdma_size[72][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[73][0], &fdvt_kernel_bias_loop73_0_frame01[0],
|
|
fd_ker_rdma_size[73][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[73][1], &fdvt_kernel_bias_loop73_1_frame01[0],
|
|
fd_ker_rdma_size[73][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[74][0], &fdvt_kernel_bias_loop74_0_frame01[0],
|
|
fd_ker_rdma_size[74][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[74][1], &fdvt_kernel_bias_loop74_1_frame01[0],
|
|
fd_ker_rdma_size[74][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[75][0], &fdvt_kernel_bias_loop75_0_frame01[0],
|
|
fd_ker_rdma_size[75][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[75][1], &fdvt_kernel_bias_loop75_1_frame01[0],
|
|
fd_ker_rdma_size[75][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[76][0], &fdvt_kernel_bias_loop76_0_frame01[0],
|
|
fd_ker_rdma_size[76][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[76][1], &fdvt_kernel_bias_loop76_1_frame01[0],
|
|
fd_ker_rdma_size[76][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[77][0], &fdvt_kernel_bias_loop77_0_frame01[0],
|
|
fd_ker_rdma_size[77][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[77][1], &fdvt_kernel_bias_loop77_1_frame01[0],
|
|
fd_ker_rdma_size[77][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[78][0], &fdvt_kernel_bias_loop78_0_frame01[0],
|
|
fd_ker_rdma_size[78][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[78][1], &fdvt_kernel_bias_loop78_1_frame01[0],
|
|
fd_ker_rdma_size[78][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[79][0], &fdvt_kernel_bias_loop79_0_frame01[0],
|
|
fd_ker_rdma_size[79][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[79][1], &fdvt_kernel_bias_loop79_1_frame01[0],
|
|
fd_ker_rdma_size[79][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[80][0], &fdvt_kernel_bias_loop80_0_frame01[0],
|
|
fd_ker_rdma_size[80][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[80][1], &fdvt_kernel_bias_loop80_1_frame01[0],
|
|
fd_ker_rdma_size[80][1]);
|
|
|
|
/*81~85*/
|
|
memcpy(fd->dma_para->fd_kernel_va[81][0], &fdvt_kernel_bias_loop81_0_frame01[0],
|
|
fd_ker_rdma_size[81][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[81][1], &fdvt_kernel_bias_loop81_1_frame01[0],
|
|
fd_ker_rdma_size[81][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[82][0], &fdvt_kernel_bias_loop82_0_frame01[0],
|
|
fd_ker_rdma_size[82][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[82][1], &fdvt_kernel_bias_loop82_1_frame01[0],
|
|
fd_ker_rdma_size[82][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[83][0], &fdvt_kernel_bias_loop83_0_frame01[0],
|
|
fd_ker_rdma_size[83][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[83][1], &fdvt_kernel_bias_loop83_1_frame01[0],
|
|
fd_ker_rdma_size[83][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[84][0], &fdvt_kernel_bias_loop84_0_frame01[0],
|
|
fd_ker_rdma_size[84][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[84][1], &fdvt_kernel_bias_loop84_1_frame01[0],
|
|
fd_ker_rdma_size[84][1]);
|
|
|
|
memcpy(fd->dma_para->fd_kernel_va[85][0], &fdvt_kernel_bias_loop85_0_frame01[0],
|
|
fd_ker_rdma_size[85][0]);
|
|
memcpy(fd->dma_para->fd_kernel_va[85][1], &fdvt_kernel_bias_loop85_1_frame01[0],
|
|
fd_ker_rdma_size[85][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[0][0], &gender_kernel_bias_loop00_0_frame01[0],
|
|
attr_ker_rdma_size[0][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[0][1], &gender_kernel_bias_loop00_1_frame01[0],
|
|
attr_ker_rdma_size[0][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[1][0], &gender_kernel_bias_loop01_0_frame01[0],
|
|
attr_ker_rdma_size[1][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[1][1], &gender_kernel_bias_loop01_1_frame01[0],
|
|
attr_ker_rdma_size[1][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[2][0], &gender_kernel_bias_loop02_0_frame01[0],
|
|
attr_ker_rdma_size[2][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[2][1], &gender_kernel_bias_loop02_1_frame01[0],
|
|
attr_ker_rdma_size[2][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[3][0], &gender_kernel_bias_loop03_0_frame01[0],
|
|
attr_ker_rdma_size[3][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[3][1], &gender_kernel_bias_loop03_1_frame01[0],
|
|
attr_ker_rdma_size[3][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[4][0], &gender_kernel_bias_loop04_0_frame01[0],
|
|
attr_ker_rdma_size[4][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[4][1], &gender_kernel_bias_loop04_1_frame01[0],
|
|
attr_ker_rdma_size[4][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[5][0], &gender_kernel_bias_loop05_0_frame01[0],
|
|
attr_ker_rdma_size[5][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[5][1], &gender_kernel_bias_loop05_1_frame01[0],
|
|
attr_ker_rdma_size[5][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[6][0], &gender_kernel_bias_loop06_0_frame01[0],
|
|
attr_ker_rdma_size[6][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[6][1], &gender_kernel_bias_loop06_1_frame01[0],
|
|
attr_ker_rdma_size[6][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[7][0], &gender_kernel_bias_loop07_0_frame01[0],
|
|
attr_ker_rdma_size[7][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[7][1], &gender_kernel_bias_loop07_1_frame01[0],
|
|
attr_ker_rdma_size[7][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[8][0], &gender_kernel_bias_loop08_0_frame01[0],
|
|
attr_ker_rdma_size[8][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[8][1], &gender_kernel_bias_loop08_1_frame01[0],
|
|
attr_ker_rdma_size[8][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[9][0], &gender_kernel_bias_loop09_0_frame01[0],
|
|
attr_ker_rdma_size[9][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[9][1], &gender_kernel_bias_loop09_1_frame01[0],
|
|
attr_ker_rdma_size[9][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[10][0], &gender_kernel_bias_loop10_0_frame01[0],
|
|
attr_ker_rdma_size[10][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[10][1], &gender_kernel_bias_loop10_1_frame01[0],
|
|
attr_ker_rdma_size[10][1]);
|
|
|
|
/*11~20*/
|
|
memcpy(fd->dma_para->attr_kernel_va[11][0], &gender_kernel_bias_loop11_0_frame01[0],
|
|
attr_ker_rdma_size[11][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[11][1], &gender_kernel_bias_loop11_1_frame01[0],
|
|
attr_ker_rdma_size[11][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[12][0], &gender_kernel_bias_loop12_0_frame01[0],
|
|
attr_ker_rdma_size[12][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[12][1], &gender_kernel_bias_loop12_1_frame01[0],
|
|
attr_ker_rdma_size[12][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[13][0], &gender_kernel_bias_loop13_0_frame01[0],
|
|
attr_ker_rdma_size[13][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[13][1], &gender_kernel_bias_loop13_1_frame01[0],
|
|
attr_ker_rdma_size[13][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[14][0], &gender_kernel_bias_loop14_0_frame01[0],
|
|
attr_ker_rdma_size[14][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[14][1], &gender_kernel_bias_loop14_1_frame01[0],
|
|
attr_ker_rdma_size[14][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[15][0], &gender_kernel_bias_loop15_0_frame01[0],
|
|
attr_ker_rdma_size[15][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[15][1], &gender_kernel_bias_loop15_1_frame01[0],
|
|
attr_ker_rdma_size[15][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[16][0], &gender_kernel_bias_loop16_0_frame01[0],
|
|
attr_ker_rdma_size[16][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[16][1], &gender_kernel_bias_loop16_1_frame01[0],
|
|
attr_ker_rdma_size[16][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[17][0], &gender_kernel_bias_loop17_0_frame01[0],
|
|
attr_ker_rdma_size[17][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[17][1], &gender_kernel_bias_loop17_1_frame01[0],
|
|
attr_ker_rdma_size[17][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[18][0], &gender_kernel_bias_loop18_0_frame01[0],
|
|
attr_ker_rdma_size[18][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[18][1], &gender_kernel_bias_loop18_1_frame01[0],
|
|
attr_ker_rdma_size[18][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[19][0], &gender_kernel_bias_loop19_0_frame01[0],
|
|
attr_ker_rdma_size[19][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[19][1], &gender_kernel_bias_loop19_1_frame01[0],
|
|
attr_ker_rdma_size[19][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[20][0], &gender_kernel_bias_loop20_0_frame01[0],
|
|
attr_ker_rdma_size[20][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[20][1], &gender_kernel_bias_loop20_1_frame01[0],
|
|
attr_ker_rdma_size[20][1]);
|
|
|
|
/*21~30: except 28*/
|
|
memcpy(fd->dma_para->attr_kernel_va[21][0], &gender_kernel_bias_loop21_0_frame01[0],
|
|
attr_ker_rdma_size[21][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[21][1], &gender_kernel_bias_loop21_1_frame01[0],
|
|
attr_ker_rdma_size[21][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[22][0], &gender_kernel_bias_loop22_0_frame01[0],
|
|
attr_ker_rdma_size[22][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[22][1], &gender_kernel_bias_loop22_1_frame01[0],
|
|
attr_ker_rdma_size[22][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[23][0], &gender_kernel_bias_loop23_0_frame01[0],
|
|
attr_ker_rdma_size[23][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[23][1], &gender_kernel_bias_loop23_1_frame01[0],
|
|
attr_ker_rdma_size[23][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[24][0], &gender_kernel_bias_loop24_0_frame01[0],
|
|
attr_ker_rdma_size[24][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[24][1], &gender_kernel_bias_loop24_1_frame01[0],
|
|
attr_ker_rdma_size[24][1]);
|
|
|
|
memcpy(fd->dma_para->attr_kernel_va[25][0], &gender_kernel_bias_loop25_0_frame01[0],
|
|
attr_ker_rdma_size[25][0]);
|
|
memcpy(fd->dma_para->attr_kernel_va[25][1], &gender_kernel_bias_loop25_1_frame01[0],
|
|
attr_ker_rdma_size[25][1]);
|
|
|
|
|
|
memcpy(fd->dma_para->fld_blink_weight_va, &fdvt_fld_blink_weight_forest14[0],
|
|
fld_blink_weight_size_non_align);
|
|
memcpy(fd->dma_para->fld_cv_va[0], &fdvt_fld_cv_forest00_iom3, fld_cv_size_00_non_align);
|
|
memcpy(fd->dma_para->fld_cv_va[1], &fdvt_fld_cv_forest01_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[2], &fdvt_fld_cv_forest02_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[3], &fdvt_fld_cv_forest03_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[4], &fdvt_fld_cv_forest04_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[5], &fdvt_fld_cv_forest05_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[6], &fdvt_fld_cv_forest06_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[7], &fdvt_fld_cv_forest07_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[8], &fdvt_fld_cv_forest08_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[9], &fdvt_fld_cv_forest09_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[10], &fdvt_fld_cv_forest10_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[11], &fdvt_fld_cv_forest11_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[12], &fdvt_fld_cv_forest12_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[13], &fdvt_fld_cv_forest13_iom3, fld_cv_size);
|
|
memcpy(fd->dma_para->fld_cv_va[14], &fdvt_fld_cv_forest14_iom3, fld_cv_size);
|
|
|
|
memcpy(fd->dma_para->fld_fp_va[0], &fdvt_fld_fp_forest00_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[1], &fdvt_fld_fp_forest01_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[2], &fdvt_fld_fp_forest02_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[3], &fdvt_fld_fp_forest03_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[4], &fdvt_fld_fp_forest04_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[5], &fdvt_fld_fp_forest05_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[6], &fdvt_fld_fp_forest06_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[7], &fdvt_fld_fp_forest07_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[8], &fdvt_fld_fp_forest08_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[9], &fdvt_fld_fp_forest09_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[10], &fdvt_fld_fp_forest10_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[11], &fdvt_fld_fp_forest11_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[12], &fdvt_fld_fp_forest12_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[13], &fdvt_fld_fp_forest13_om45, fld_fp_size_non_align);
|
|
memcpy(fd->dma_para->fld_fp_va[14], &fdvt_fld_fp_forest14_om45, fld_fp_size_non_align);
|
|
|
|
memcpy(fd->dma_para->fld_leafnode_va[0], &fdvt_fld_leafnode_forest00, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[1], &fdvt_fld_leafnode_forest01, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[2], &fdvt_fld_leafnode_forest02, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[3], &fdvt_fld_leafnode_forest03, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[4], &fdvt_fld_leafnode_forest04, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[5], &fdvt_fld_leafnode_forest05, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[6], &fdvt_fld_leafnode_forest06, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[7], &fdvt_fld_leafnode_forest07, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[8], &fdvt_fld_leafnode_forest08, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[9], &fdvt_fld_leafnode_forest09, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[10], &fdvt_fld_leafnode_forest10, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[11], &fdvt_fld_leafnode_forest11, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[12], &fdvt_fld_leafnode_forest12, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[13], &fdvt_fld_leafnode_forest13, fld_leafnode_size);
|
|
memcpy(fd->dma_para->fld_leafnode_va[14], &fdvt_fld_leafnode_forest14, fld_leafnode_size);
|
|
|
|
memcpy(fd->dma_para->fld_tree13_va[0], &fdvt_fld_tree_forest00_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[1], &fdvt_fld_tree_forest01_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[2], &fdvt_fld_tree_forest02_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[3], &fdvt_fld_tree_forest03_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[4], &fdvt_fld_tree_forest04_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[5], &fdvt_fld_tree_forest05_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[6], &fdvt_fld_tree_forest06_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[7], &fdvt_fld_tree_forest07_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[8], &fdvt_fld_tree_forest08_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[9], &fdvt_fld_tree_forest09_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[10], &fdvt_fld_tree_forest10_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[11], &fdvt_fld_tree_forest11_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[12], &fdvt_fld_tree_forest12_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[13], &fdvt_fld_tree_forest13_km13,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree13_va[14], &fdvt_fld_tree_forest14_km13,
|
|
fld_tree_size_non_align);
|
|
|
|
memcpy(fd->dma_para->fld_tree02_va[0], &fdvt_fld_tree_forest00_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[1], &fdvt_fld_tree_forest01_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[2], &fdvt_fld_tree_forest02_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[3], &fdvt_fld_tree_forest03_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[4], &fdvt_fld_tree_forest04_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[5], &fdvt_fld_tree_forest05_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[6], &fdvt_fld_tree_forest06_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[7], &fdvt_fld_tree_forest07_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[8], &fdvt_fld_tree_forest08_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[9], &fdvt_fld_tree_forest09_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[10], &fdvt_fld_tree_forest10_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[11], &fdvt_fld_tree_forest11_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[12], &fdvt_fld_tree_forest12_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[13], &fdvt_fld_tree_forest13_km02,
|
|
fld_tree_size_non_align);
|
|
memcpy(fd->dma_para->fld_tree02_va[14], &fdvt_fld_tree_forest14_km02,
|
|
fld_tree_size_non_align);
|
|
|
|
#if CHECK_SERVICE_0
|
|
u8 i, j;
|
|
int ret;
|
|
char name[128];
|
|
char *sel_folder;
|
|
char *mp_folder = "aie_mp_fw";
|
|
|
|
sel_folder = mp_folder;
|
|
|
|
ret = sprintf(name, "%s/config/aie_fd_fd_config.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->base_para->fd_fd_cfg_va,
|
|
fd->fd_fd_cfg_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = sprintf(name, "%s/config/aie_fd_rs_config.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->base_para->fd_rs_cfg_va,
|
|
fd->fd_rs_cfg_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = sprintf(name, "%s/config/aie_fd_yuv2rgb_config.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->base_para->fd_yuv2rgb_cfg_va,
|
|
fd->fd_yuv2rgb_cfg_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = sprintf(name, "%s/config/aie_attr_fd_config.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->base_para->attr_fd_cfg_va[0],
|
|
fd->attr_fd_cfg_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = sprintf(name, "%s/config/aie_attr_yuv2rgb_config.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->base_para->attr_yuv2rgb_cfg_va[0],
|
|
fd->attr_yuv2rgb_cfg_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
|
|
memcpy(fd->base_para->attr_fd_cfg_va[i],
|
|
fd->base_para->attr_fd_cfg_va[0], fd->attr_fd_cfg_size);
|
|
memcpy(fd->base_para->attr_yuv2rgb_cfg_va[i],
|
|
fd->base_para->attr_yuv2rgb_cfg_va[0],
|
|
fd->attr_yuv2rgb_cfg_size);
|
|
}
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
if (fd_ker_rdma_size[i][j]) {
|
|
ret = sprintf(name,
|
|
"%s/kernel/aie_fd_kernel_bias_loop%02d_%d.bin",
|
|
sel_folder, i, j);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(
|
|
fd, name,
|
|
fd->dma_para->fd_kernel_va[i][j],
|
|
fd_ker_rdma_size[i][j]);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
ret = sprintf(name,
|
|
"%s/kernel/aie_attr_kernel_bias_loop%02d_%d.bin",
|
|
sel_folder, i, j);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name,
|
|
fd->dma_para->attr_kernel_va[i][j],
|
|
attr_ker_rdma_size[i][j]);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
}
|
|
#ifdef FLD
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_blink_weight_forest14.bin", sel_folder);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_blink_weight_va,
|
|
fld_blink_weight_size);
|
|
|
|
for (i = 0; i < FLD_MAX_INPUT; i++) {
|
|
/*cv forest*/
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_cv_forest%02d_iom3.bin", sel_folder, i);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_cv_va[i], fld_cv_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*leafnode forest*/
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_leafnode_forest%02d.bin", sel_folder, i);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_leafnode_va[i], fld_leafnode_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*fp forest*/
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_fp_forest%02d_om45.bin", sel_folder, i);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_fp_va[i], fld_fp_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*tree forest13*/
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_tree_forest%02d_km13.bin", sel_folder, i);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_tree13_va[i], fld_tree_size);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*tree forest02*/
|
|
ret = sprintf(name, "%s/fldmodel/aie_fld_tree_forest%02d_km02.bin", sel_folder, i);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = aie_copy_fw(fd, name, fd->dma_para->fld_tree02_va[i], fld_tree_size);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
#if CHECK_SERVICE_0
|
|
static void aie_reset_output_buf(struct mtk_aie_dev *fd,
|
|
struct aie_enq_info *aie_cfg)
|
|
{
|
|
if (aie_cfg->sel_mode == 0) {
|
|
memset(fd->rs_output_hw.va, 0, fd->rs_output_hw.size);
|
|
|
|
memset(fd->dma_para->fd_out_hw_va[rpn0_loop_num][0], 0,
|
|
result_size);
|
|
memset(fd->dma_para->fd_out_hw_va[rpn1_loop_num][0], 0,
|
|
result_size);
|
|
memset(fd->dma_para->fd_out_hw_va[rpn2_loop_num][0], 0,
|
|
result_size);
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
memset(fd->base_para->rs_pym_rst_va[0][0], 0,
|
|
fd->rs_pym_out_size[0]);
|
|
memset(fd->base_para->rs_pym_rst_va[0][1], 0,
|
|
fd->rs_pym_out_size[0]);
|
|
memset(fd->base_para->rs_pym_rst_va[0][2], 0,
|
|
fd->rs_pym_out_size[0]);
|
|
}
|
|
}
|
|
#endif
|
|
static int aie_update_cfg(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
int crop_width;
|
|
int crop_height;
|
|
|
|
crop_width = aie_cfg->src_img_width;
|
|
crop_height = aie_cfg->src_img_height;
|
|
|
|
if (aie_cfg->en_roi) {
|
|
crop_width = aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1 + 1;
|
|
crop_height = aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1 + 1;
|
|
}
|
|
|
|
if (crop_width == 0 || crop_height == 0) {
|
|
dev_info(fd->dev, "AIE error:crop size is wrong");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (aie_cfg->en_padding) {
|
|
crop_width = crop_width + aie_cfg->src_padding.right +
|
|
aie_cfg->src_padding.left;
|
|
crop_height = crop_height + aie_cfg->src_padding.up +
|
|
aie_cfg->src_padding.down;
|
|
}
|
|
|
|
if (aie_cfg->sel_mode == 0) {
|
|
fd->base_para->sel_mode = aie_cfg->sel_mode;
|
|
fd->base_para->crop_width = crop_width;
|
|
fd->base_para->crop_height = crop_height;
|
|
fd->base_para->src_img_addr = aie_cfg->src_img_addr;
|
|
fd->base_para->src_img_addr_uv = aie_cfg->src_img_addr_uv;
|
|
fd->base_para->img_width = aie_cfg->src_img_width;
|
|
fd->base_para->img_height = aie_cfg->src_img_height;
|
|
fd->base_para->src_img_fmt = aie_cfg->src_img_fmt;
|
|
fd->base_para->rotate_degree = aie_cfg->rotate_degree;
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
fd->attr_para->sel_mode[fd->attr_para->w_idx] =
|
|
aie_cfg->sel_mode;
|
|
fd->attr_para->crop_width[fd->attr_para->w_idx] = crop_width;
|
|
fd->attr_para->crop_height[fd->attr_para->w_idx] = crop_height;
|
|
fd->attr_para->src_img_addr[fd->attr_para->w_idx] =
|
|
aie_cfg->src_img_addr;
|
|
fd->attr_para->src_img_addr_uv[fd->attr_para->w_idx] =
|
|
aie_cfg->src_img_addr_uv;
|
|
fd->attr_para->img_width[fd->attr_para->w_idx] =
|
|
aie_cfg->src_img_width;
|
|
fd->attr_para->img_height[fd->attr_para->w_idx] =
|
|
aie_cfg->src_img_height;
|
|
fd->attr_para->src_img_fmt[fd->attr_para->w_idx] =
|
|
aie_cfg->src_img_fmt;
|
|
fd->attr_para->rotate_degree[fd->attr_para->w_idx] =
|
|
aie_cfg->rotate_degree;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static u32 aie_combine_u16(u16 low, u16 high)
|
|
{
|
|
return ((u32)high << 16) | low;
|
|
}
|
|
|
|
static u32 aie_combine_stride(u16 low, u16 high)
|
|
{
|
|
return ((u32)high << 16) | (low & 0x000F);
|
|
}
|
|
|
|
static int aie_config_y2r(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg,
|
|
int mode)
|
|
{
|
|
u32 img_addr = 0;
|
|
u32 img_addr_UV = 0;
|
|
u32 img_off = 0;
|
|
u32 img_off_uv = 0;
|
|
u32 *yuv2rgb_cfg = NULL;
|
|
u32 srcbuf = 0, srcbuf_UV = 0;
|
|
u16 xmag_0 = 0, ymag_0 = 0;
|
|
u16 pym0_out_w = 0;
|
|
u16 pym0_out_h = 0;
|
|
u16 stride_pym0_out_w = 0;
|
|
u16 src_crop_w = 0;
|
|
u16 src_crop_h = 0;
|
|
unsigned int msb_bit_0 = 0, msb_bit_1 = 0, msb_bit_2 = 0;
|
|
|
|
|
|
if (aie_cfg->en_roi == false) {
|
|
img_off = 0;
|
|
img_off_uv = 0;
|
|
} else {
|
|
if (aie_cfg->src_img_fmt == FMT_MONO ||
|
|
aie_cfg->src_img_fmt == FMT_YUV_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YVU_2P) {
|
|
img_off =
|
|
aie_cfg->src_img_stride * aie_cfg->src_roi.y1 +
|
|
aie_cfg->src_roi.x1;
|
|
img_off_uv =
|
|
aie_cfg->src_img_stride * aie_cfg->src_roi.y1 +
|
|
aie_cfg->src_roi.x1;
|
|
} else if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YUV420_1P) {
|
|
img_off =
|
|
aie_cfg->src_img_stride * aie_cfg->src_roi.y1 +
|
|
aie_cfg->src_roi.x1;
|
|
img_off_uv = aie_cfg->src_img_stride *
|
|
aie_cfg->src_roi.y1 / 2 +
|
|
aie_cfg->src_roi.x1;
|
|
} else if (aie_cfg->src_img_fmt == FMT_YUYV ||
|
|
aie_cfg->src_img_fmt == FMT_YVYU ||
|
|
aie_cfg->src_img_fmt == FMT_UYVY ||
|
|
aie_cfg->src_img_fmt == FMT_VYUY) {
|
|
img_off =
|
|
aie_cfg->src_img_stride * aie_cfg->src_roi.y1 +
|
|
aie_cfg->src_roi.x1 * 2;
|
|
img_off_uv =
|
|
aie_cfg->src_img_stride * aie_cfg->src_roi.y1 +
|
|
aie_cfg->src_roi.x1 * 2;
|
|
} else {
|
|
dev_info(fd->dev,
|
|
"AIE error: Unsupport input format %d",
|
|
aie_cfg->src_img_fmt);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
img_addr = aie_cfg->src_img_addr + img_off;
|
|
img_addr_UV = aie_cfg->src_img_addr_uv + img_off_uv;
|
|
|
|
srcbuf = img_addr;
|
|
if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YUV420_1P ||
|
|
aie_cfg->src_img_fmt == FMT_YUV_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YVU_2P)
|
|
srcbuf_UV = img_addr_UV;
|
|
else
|
|
srcbuf_UV = 0;
|
|
|
|
if (mode == 0) {
|
|
src_crop_w = fd->base_para->crop_width;
|
|
src_crop_h = fd->base_para->crop_height;
|
|
yuv2rgb_cfg = (u32 *)fd->base_para->fd_yuv2rgb_cfg_va;
|
|
pym0_out_w = fd->base_para->pyramid_width;
|
|
} else if (mode == 1) {
|
|
src_crop_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
|
|
src_crop_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
|
|
yuv2rgb_cfg =
|
|
(u32 *)fd->base_para
|
|
->attr_yuv2rgb_cfg_va[fd->attr_para->w_idx];
|
|
pym0_out_w = ATTR_MODE_PYRAMID_WIDTH;
|
|
}
|
|
|
|
pym0_out_h = pym0_out_w * src_crop_h / src_crop_w;
|
|
|
|
if (pym0_out_w != 0) {
|
|
xmag_0 = 512 * src_crop_w / pym0_out_w;
|
|
ymag_0 = xmag_0;
|
|
} else {
|
|
xmag_0 = 0;
|
|
ymag_0 = 0;
|
|
}
|
|
|
|
yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] =
|
|
(yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] & 0xFFFFFFF8) |
|
|
((aie_cfg->src_img_fmt) & 0x7);
|
|
if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YUV420_1P) { /* for match patten */
|
|
yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] =
|
|
(yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] & 0xFFFFFFF8) |
|
|
((0x3) & 0x7);
|
|
}
|
|
yuv2rgb_cfg[Y2R_IN_W_H] = (yuv2rgb_cfg[Y2R_IN_W_H] & 0xF800F800) |
|
|
((src_crop_w << 16) & 0x7FF0000) |
|
|
(src_crop_h & 0x7FF);
|
|
yuv2rgb_cfg[Y2R_OUT_W_H] = (yuv2rgb_cfg[Y2R_OUT_W_H] & 0xF800F800) |
|
|
((pym0_out_w << 16) & 0x7FF0000) |
|
|
(pym0_out_h & 0x7FF);
|
|
|
|
if (aie_cfg->src_img_fmt == FMT_YUV_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YVU_2P) { /* 2 plane */
|
|
yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
|
|
(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x11;
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] =
|
|
aie_combine_u16(src_crop_w - 1, src_crop_h - 1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] =
|
|
aie_combine_u16(src_crop_w - 1, src_crop_h - 1);
|
|
}
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x1;
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x1;
|
|
} else if (aie_cfg->src_img_fmt == FMT_MONO) {
|
|
yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
|
|
(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x01;
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] =
|
|
aie_combine_u16(src_crop_w - 1, src_crop_h - 1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] =
|
|
aie_combine_u16(src_crop_w - 1, src_crop_h - 1);
|
|
}
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
|
|
} else if (aie_cfg->src_img_fmt == FMT_YUYV ||
|
|
aie_cfg->src_img_fmt == FMT_YVYU ||
|
|
aie_cfg->src_img_fmt == FMT_UYVY ||
|
|
aie_cfg->src_img_fmt == FMT_VYUY) { /* 1 plane */
|
|
yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
|
|
(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x1;
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
2 * (aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1 +
|
|
1) -
|
|
1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
2 * (aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1 +
|
|
1) -
|
|
1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
2 * src_crop_w - 1, src_crop_h - 1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
2 * src_crop_w - 1, src_crop_h - 1);
|
|
}
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x3;
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x3;
|
|
}
|
|
|
|
/* AIE3.0 */
|
|
if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
|
|
aie_cfg->src_img_fmt == FMT_YUV420_1P) {
|
|
yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
|
|
(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x11;
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1,
|
|
(aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1) /
|
|
2);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] =
|
|
aie_combine_u16(src_crop_w - 1, src_crop_h - 1);
|
|
yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
src_crop_w - 1, src_crop_h / 2 - 1);
|
|
}
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
|
|
yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
|
|
(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
|
|
((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
|
|
|
|
yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] =
|
|
(yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] & 0xFFFFFFFE) | 0x01;
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_combine_u16(
|
|
0, aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1);
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_combine_u16(
|
|
0, aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_X] =
|
|
aie_combine_u16(0, src_crop_w - 1);
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_Y] =
|
|
aie_combine_u16(0, src_crop_h - 1);
|
|
}
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] =
|
|
(yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] & 0xFFFFFFFE);
|
|
|
|
if (aie_cfg->en_roi) {
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_combine_u16(
|
|
0, aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1);
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_combine_u16(
|
|
0, aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_X] =
|
|
aie_combine_u16(0, src_crop_w - 1);
|
|
yuv2rgb_cfg[Y2R_CO2_CROP_Y] =
|
|
aie_combine_u16(0, src_crop_h - 1);
|
|
}
|
|
}
|
|
|
|
stride_pym0_out_w = round_up(pym0_out_w, 8);
|
|
|
|
yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE0] =
|
|
aie_combine_u16(pym0_out_w - 1, pym0_out_h - 1);
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE0_BUS_SIZE0] = aie_combine_u16(
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE0_BUS_SIZE0], stride_pym0_out_w);
|
|
yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE1] =
|
|
aie_combine_u16(pym0_out_w - 1, pym0_out_h - 1);
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE1_BUS_SIZE1] = aie_combine_u16(
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE1_BUS_SIZE1], stride_pym0_out_w);
|
|
yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE2] =
|
|
aie_combine_u16(pym0_out_w - 1, pym0_out_h - 1);
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE2_BUS_SIZE2] = aie_combine_u16(
|
|
yuv2rgb_cfg[Y2R_OUT_STRIDE2_BUS_SIZE2], stride_pym0_out_w);
|
|
|
|
if (aie_cfg->en_padding) {
|
|
yuv2rgb_cfg[Y2R_PADDING_EN_UP_DOWN] =
|
|
1 | ((aie_cfg->src_padding.up << 4) & 0x1FF0) |
|
|
((aie_cfg->src_padding.down << 16) & 0x01FF0000);
|
|
yuv2rgb_cfg[Y2R_PADDING_RIGHT_LEFT] =
|
|
(aie_cfg->src_padding.right & 0x01FF) |
|
|
((aie_cfg->src_padding.left << 16) & 0x01FF0000);
|
|
} else {
|
|
yuv2rgb_cfg[Y2R_PADDING_EN_UP_DOWN] = 0;
|
|
yuv2rgb_cfg[Y2R_PADDING_RIGHT_LEFT] = 0;
|
|
}
|
|
|
|
yuv2rgb_cfg[Y2R_IN_0] = srcbuf;
|
|
yuv2rgb_cfg[Y2R_IN_1] = srcbuf_UV;
|
|
|
|
|
|
//yuv2rgb_cfg[POS_Y2RCON_IN_BA_MSB] = (u32)0x00000303; //for UT
|
|
yuv2rgb_cfg[POS_Y2RCON_IN_BA_MSB] = (u32)(fd->img_msb_y | fd->img_msb_uv << 8);
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[0][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[0][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[0][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
yuv2rgb_cfg[POS_Y2RCON_OUT_BA_MSB] = (u32)(msb_bit_0 | msb_bit_1 << 8 |
|
|
msb_bit_2 << 16);//0x00030303
|
|
|
|
yuv2rgb_cfg[Y2R_OUT_0] = (u32)fd->base_para->rs_pym_rst_pa[0][0];
|
|
yuv2rgb_cfg[Y2R_OUT_1] = (u32)fd->base_para->rs_pym_rst_pa[0][1];
|
|
yuv2rgb_cfg[Y2R_OUT_2] = (u32)fd->base_para->rs_pym_rst_pa[0][2];
|
|
|
|
yuv2rgb_cfg[Y2R_X_Y_MAG] =
|
|
(xmag_0 & 0x3FFF) | ((ymag_0 << 16) & 0x3FFF0000);
|
|
|
|
if (src_crop_w >= pym0_out_w) { /* down scale AIE1.0 by FRZ */
|
|
yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] =
|
|
(yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] & 0x00100070) |
|
|
FDRZ_BIT;
|
|
yuv2rgb_cfg[Y2R_SRZ_HORI_STEP] = 0;
|
|
yuv2rgb_cfg[Y2R_SRZ_VERT_STEP] = 0;
|
|
} else { /* SRZ */
|
|
/* 0: FDRZ for down scaling */
|
|
/* 1: SRZ for up scaling */
|
|
yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] =
|
|
(yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] & 0x00100070) | SRZ_BIT;
|
|
yuv2rgb_cfg[Y2R_SRZ_HORI_STEP] =
|
|
((src_crop_w - 1) << 15) / (pym0_out_w - 1);
|
|
yuv2rgb_cfg[Y2R_SRZ_VERT_STEP] =
|
|
((src_crop_h - 1) << 15) / (pym0_out_h - 1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int aie_config_rs(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
u32 *rs_cfg = NULL;
|
|
u32 *rs_tbl[2] = {NULL, NULL};
|
|
u16 xmag_0 = 0, ymag_0 = 0;
|
|
u16 pym_out_w[3] = {0, 0, 0};
|
|
u16 pym_out_h[3] = {0, 0, 0};
|
|
u16 round_w = 0;
|
|
u16 src_crop_w = 0;
|
|
u16 src_crop_h = 0;
|
|
int i = 0, msb_bit_0 = 0, msb_bit_1 = 0, msb_bit_2 = 0;
|
|
|
|
if (aie_cfg->sel_mode == 0) {
|
|
src_crop_w = fd->base_para->crop_width;
|
|
src_crop_h = fd->base_para->crop_height;
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
src_crop_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
|
|
src_crop_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
|
|
}
|
|
|
|
rs_cfg = (u32 *)fd->base_para->fd_rs_cfg_va;
|
|
|
|
pym_out_w[0] = fd->base_para->pyramid_width;
|
|
pym_out_w[1] = pym_out_w[0] >> 1;
|
|
pym_out_w[2] = pym_out_w[1] >> 1;
|
|
|
|
pym_out_h[0] = pym_out_w[0] * src_crop_h / src_crop_w;
|
|
pym_out_h[1] = pym_out_h[0] >> 1;
|
|
pym_out_h[2] = pym_out_h[1] >> 1;
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
rs_tbl[i] = rs_cfg + RS_CONFIG_SIZE * i;
|
|
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[i][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[i][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[i][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
rs_tbl[i][POS_RSCON_IN_BA_MSB] = (u32)(msb_bit_0 | msb_bit_1 << 8 |
|
|
msb_bit_2 << 16); //0x00030303
|
|
|
|
rs_tbl[i][RS_IN_0] = (u32)fd->base_para->rs_pym_rst_pa[i][0];
|
|
rs_tbl[i][RS_IN_1] = (u32)fd->base_para->rs_pym_rst_pa[i][1];
|
|
rs_tbl[i][RS_IN_2] = (u32)fd->base_para->rs_pym_rst_pa[i][2];
|
|
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[i + 1][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[i + 1][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[i + 1][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
rs_tbl[i][POS_RSCON_OUT_BA_MSB] = (u32)(msb_bit_0 | msb_bit_1 << 8 |
|
|
msb_bit_2 << 16); //0x00030303
|
|
|
|
rs_tbl[i][RS_OUT_0] =
|
|
(u32)fd->base_para->rs_pym_rst_pa[i + 1][0];
|
|
rs_tbl[i][RS_OUT_1] =
|
|
(u32)fd->base_para->rs_pym_rst_pa[i + 1][1];
|
|
rs_tbl[i][RS_OUT_2] =
|
|
(u32)fd->base_para->rs_pym_rst_pa[i + 1][2];
|
|
|
|
rs_tbl[i][RS_INPUT_W_H] =
|
|
(rs_tbl[i][RS_INPUT_W_H] & 0xF800F800) |
|
|
(pym_out_h[i] & 0x7FF) |
|
|
((pym_out_w[i] << 16) & 0x7FF0000);
|
|
rs_tbl[i][RS_OUTPUT_W_H] =
|
|
(rs_tbl[i][RS_OUTPUT_W_H] & 0xF800F800) |
|
|
(pym_out_h[i + 1] & 0x7FF) |
|
|
((pym_out_w[i + 1] << 16) & 0x7FF0000);
|
|
rs_tbl[i][RS_IN_X_Y_SIZE0] =
|
|
aie_combine_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
|
|
rs_tbl[i][RS_IN_X_Y_SIZE1] =
|
|
aie_combine_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
|
|
rs_tbl[i][RS_IN_X_Y_SIZE2] =
|
|
aie_combine_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
|
|
rs_tbl[i][RS_IN_STRIDE0] =
|
|
aie_combine_u16(rs_tbl[i][RS_IN_STRIDE0], pym_out_w[i]);
|
|
rs_tbl[i][RS_IN_STRIDE1] =
|
|
aie_combine_u16(rs_tbl[i][RS_IN_STRIDE1], pym_out_w[i]);
|
|
rs_tbl[i][RS_IN_STRIDE2] =
|
|
aie_combine_u16(rs_tbl[i][RS_IN_STRIDE2], pym_out_w[i]);
|
|
rs_tbl[i][RS_OUT_X_Y_SIZE0] = aie_combine_u16(
|
|
pym_out_w[i + 1] - 1, pym_out_h[i + 1] - 1);
|
|
rs_tbl[i][RS_OUT_X_Y_SIZE1] = aie_combine_u16(
|
|
pym_out_w[i + 1] - 1, pym_out_h[i + 1] - 1);
|
|
rs_tbl[i][RS_OUT_X_Y_SIZE2] = aie_combine_u16(
|
|
pym_out_w[i + 1] - 1, pym_out_h[i + 1] - 1);
|
|
|
|
if (i == 0)
|
|
round_w = pym_out_w[i + 1];
|
|
else
|
|
round_w = round_up(pym_out_w[i + 1], 8);
|
|
|
|
rs_tbl[i][RS_OUT_STRIDE0] =
|
|
aie_combine_u16(rs_tbl[i][RS_OUT_STRIDE0], round_w);
|
|
rs_tbl[i][RS_OUT_STRIDE1] =
|
|
aie_combine_u16(rs_tbl[i][RS_OUT_STRIDE1], round_w);
|
|
rs_tbl[i][RS_OUT_STRIDE2] =
|
|
aie_combine_u16(rs_tbl[i][RS_OUT_STRIDE2], round_w);
|
|
|
|
xmag_0 = 512 * pym_out_w[i] / pym_out_w[i + 1];
|
|
ymag_0 = xmag_0;
|
|
|
|
rs_tbl[i][RS_X_Y_MAG] =
|
|
(xmag_0 & 0x3FFF) | ((ymag_0 << 16) & 0x3FFF0000);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int aie_config_network(struct mtk_aie_dev *fd,
|
|
struct aie_enq_info *aie_cfg)
|
|
{
|
|
u16 conv_width = 0;
|
|
u16 conv_height = 0;
|
|
u8 i = 0;
|
|
u8 j = 0;
|
|
u8 uch = 0;
|
|
u8 uloop = 0;
|
|
u16 fd_xsize[4] = {0, 0, 0, 0};
|
|
void *fd_cfg = NULL;
|
|
u32 *fd_cur_cfg = NULL;
|
|
u32 *fd_cur_set = NULL;
|
|
u16 pyramid0_out_w = 0;
|
|
u16 pyramid0_out_h = 0;
|
|
u16 pyramid1_out_h = 0;
|
|
u16 pyramid2_out_h = 0;
|
|
u16 input_height = 0;
|
|
u16 out_height = 0;
|
|
u16 out_ysize_plus_1 = 0;
|
|
u16 out_ysize_plus_1_stride2 = 0;
|
|
u32 src_crop_w = 0;
|
|
u32 src_crop_h = 0;
|
|
struct aie_static_info *pstv = NULL;
|
|
int msb_bit_0 = 0, msb_bit_1 = 0, msb_bit_2 = 0, msb_bit_3 = 0;
|
|
int filter = 0;
|
|
|
|
pstv = &fd->st_info;
|
|
|
|
if (aie_cfg->sel_mode == 0) {
|
|
src_crop_w = fd->base_para->crop_width;
|
|
src_crop_h = fd->base_para->crop_height;
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
src_crop_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
|
|
src_crop_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
|
|
}
|
|
|
|
pyramid0_out_w = fd->base_para->pyramid_width;
|
|
pyramid0_out_h = pyramid0_out_w * src_crop_h / src_crop_w;
|
|
|
|
pyramid1_out_h = pyramid0_out_h / 2;
|
|
pyramid2_out_h = pyramid1_out_h / 2;
|
|
|
|
fd_cfg = fd->base_para->fd_fd_cfg_va;
|
|
|
|
for (i = 0; i < fd_loop_num; i++) {
|
|
fd_cur_cfg = (u32 *)fd_cfg + FD_CONFIG_SIZE * i;
|
|
fd_cur_cfg[FD_INPUT_ROTATE] =
|
|
(fd_cur_cfg[FD_INPUT_ROTATE] & 0xFFFF0FFF) |
|
|
((aie_cfg->rotate_degree << 12) & 0x3000);
|
|
|
|
if (i == 0) {
|
|
input_height = pyramid2_out_h;
|
|
} else if (i == (rpn2_loop_num + 1)) {
|
|
input_height = pyramid1_out_h;
|
|
} else if (i == (rpn1_loop_num + 1)) {
|
|
input_height = pyramid0_out_h;
|
|
} else {
|
|
if (fd_out_stride2_in[i] == 0)
|
|
input_height = out_height;
|
|
else
|
|
input_height = (out_height + 1) / 2;
|
|
}
|
|
if (i == rpn0_loop_num)
|
|
fd->pose_height = input_height;
|
|
|
|
if (fd_maxpool[i] == 1 && fd_stride[i] == 1)
|
|
out_height =
|
|
DIV_ROUND_UP(input_height, 2 * fd_maxpool[i]);
|
|
else
|
|
out_height = DIV_ROUND_UP(
|
|
input_height, fd_stride[i] + 2 * fd_maxpool[i]);
|
|
|
|
if (i == rpn0_loop_num || i == rpn1_loop_num ||
|
|
i == rpn2_loop_num) {
|
|
conv_width = fd->base_para->img_width;
|
|
conv_height = fd->base_para->img_height;
|
|
fd_xsize[0] =
|
|
pstv->img_width[i] * 2 * 16 * anchor_en_num[i] -
|
|
1;
|
|
fd_xsize[1] = fd_xsize[2] = fd_xsize[3] =
|
|
pstv->img_width[i] * 2 * 32 * anchor_en_num[i] -
|
|
1;
|
|
} else {
|
|
conv_width =
|
|
DIV_ROUND_UP(pstv->img_width[i], fd_stride[i]);
|
|
conv_height = DIV_ROUND_UP(input_height, fd_stride[i]);
|
|
|
|
fd_xsize[0] = fd_xsize[1] = fd_xsize[2] = fd_xsize[3] =
|
|
pstv->input_xsize_plus_1[i] - 1;
|
|
}
|
|
|
|
fd_cur_cfg[FD_CONV_WIDTH_MOD6] =
|
|
(fd_cur_cfg[FD_CONV_WIDTH_MOD6] & 0xFF8FFFFF) |
|
|
(((conv_width % 6) << 20) & 0x00700000);
|
|
fd_cur_cfg[FD_CONV_IMG_W_H] =
|
|
aie_combine_u16(conv_height, conv_width);
|
|
|
|
fd_cur_cfg[FD_IN_IMG_W_H] =
|
|
aie_combine_u16(input_height, pstv->img_width[i]);
|
|
fd_cur_cfg[FD_OUT_IMG_W_H] =
|
|
aie_combine_u16(out_height, pstv->out_width[i]);
|
|
|
|
if (fd_rdma_en[i][0][0] != -1) {
|
|
for (j = 0; j < 4; j++) {
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE0 + 2 * j] =
|
|
aie_combine_u16(fd_xsize[j],
|
|
input_height - 1);
|
|
|
|
fd_cur_cfg[FD_IN_STRIDE0_BUS_SIZE0 + 2 * j] =
|
|
aie_combine_stride(
|
|
fd_cur_cfg
|
|
[FD_IN_STRIDE0_BUS_SIZE0 +
|
|
2 * j],
|
|
fd_xsize[j] + 1);
|
|
}
|
|
}
|
|
|
|
out_ysize_plus_1 = out_height - 1;
|
|
out_ysize_plus_1_stride2 = (out_height + 1) / 2 - 1;
|
|
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
fd_cur_set = fd_cur_cfg + 2 * j;
|
|
if (!fd_wdma_en[i][j])
|
|
continue;
|
|
|
|
if (out_stride_size[i][j] == 1) {
|
|
fd_cur_set[FD_OUT_X_Y_SIZE0] = aie_combine_u16(
|
|
pstv->out_xsize_plus_1[i] - 1,
|
|
out_ysize_plus_1);
|
|
fd_cur_set[FD_OUT_STRIDE0_BUS_SIZE0] =
|
|
aie_combine_stride(
|
|
fd_cur_set
|
|
[FD_OUT_STRIDE0_BUS_SIZE0],
|
|
pstv->out_stride[i]);
|
|
} else if (out_stride_size[i][j] == 2) {
|
|
fd_cur_set[FD_OUT_X_Y_SIZE0] = aie_combine_u16(
|
|
pstv->out_xsize_plus_1_stride2[i] - 1,
|
|
out_ysize_plus_1_stride2);
|
|
fd_cur_set[FD_OUT_STRIDE0_BUS_SIZE0] =
|
|
aie_combine_stride(
|
|
fd_cur_set
|
|
[FD_OUT_STRIDE0_BUS_SIZE0],
|
|
pstv->out_stride_stride2[i]);
|
|
}
|
|
}
|
|
|
|
if (i == rpn0_loop_num || i == rpn1_loop_num || i == rpn2_loop_num) {
|
|
|
|
fd_cur_cfg[FD_RPN_SET] =
|
|
aie_combine_u16(fd_cur_cfg[FD_RPN_SET],
|
|
fd->base_para->rpn_anchor_thrd);
|
|
fd_cur_cfg[FD_IN_CHANNEL_PACK] = fd_cur_cfg[Y2R_SRC_DST_FORMAT] |
|
|
0x30000000;
|
|
}
|
|
|
|
if (i == rpn0_loop_num) {
|
|
fd_cur_cfg[FD_IMAGE_COORD] =
|
|
(fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
|
|
(((src_crop_w * 100 /
|
|
(int)fd->base_para->pyramid_width * 512 /
|
|
100)
|
|
<< 4) &
|
|
0x7FFF0);
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
|
|
if (aie_cfg->en_roi) {
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
|
|
(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
|
|
(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
|
|
}
|
|
} else if (i == rpn1_loop_num) {
|
|
fd_cur_cfg[FD_IMAGE_COORD] =
|
|
(fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
|
|
(((src_crop_w * 100 /
|
|
(int)fd->base_para->pyramid_width * 2 * 512 /
|
|
100)
|
|
<< 4) &
|
|
0x7FFF0);
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
|
|
if (aie_cfg->en_roi) {
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
|
|
(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
|
|
(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
|
|
}
|
|
} else if (i == rpn2_loop_num) {
|
|
fd_cur_cfg[FD_IMAGE_COORD] =
|
|
(fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
|
|
(((src_crop_w * 100 /
|
|
(int)fd->base_para->pyramid_width * 4 * 512 /
|
|
100)
|
|
<< 4) &
|
|
0x7FFF0);
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
|
|
if (aie_cfg->en_roi) {
|
|
fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
|
|
(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
|
|
(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
|
|
}
|
|
}
|
|
|
|
/* IN_FM_BASE_ADR */
|
|
if (i == 0) {
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[2][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[2][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[2][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] = (u32)(msb_bit_0 |
|
|
msb_bit_1 << 8 | msb_bit_2 << 16);
|
|
fd_cur_cfg[FD_IN_0] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[2][0]);
|
|
fd_cur_cfg[FD_IN_1] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[2][1]);
|
|
fd_cur_cfg[FD_IN_2] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[2][2]);
|
|
} else if (i == (rpn2_loop_num + 1)) {
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[1][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[1][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[1][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] = (u32)(msb_bit_0 |
|
|
msb_bit_1 << 8 | msb_bit_2 << 16);
|
|
|
|
fd_cur_cfg[FD_IN_0] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[1][0]);
|
|
fd_cur_cfg[FD_IN_1] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[1][1]);
|
|
fd_cur_cfg[FD_IN_2] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[1][2]);
|
|
} else if (i == (rpn1_loop_num + 1)) {
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[0][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[0][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[0][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] = (u32)(msb_bit_0 |
|
|
msb_bit_1 << 8 | msb_bit_2 << 16);
|
|
fd_cur_cfg[FD_IN_0] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][0]);
|
|
fd_cur_cfg[FD_IN_1] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][1]);
|
|
fd_cur_cfg[FD_IN_2] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][2]);
|
|
} else {
|
|
for (j = 0; j < input_WDMA_WRA_num; j++) {
|
|
if (fd_rdma_en[i][j][0] != -1) {
|
|
uloop = fd_rdma_en[i][j][0];
|
|
uch = fd_rdma_en[i][j][1];
|
|
if (j == 0) {
|
|
msb_bit_0 = (fd->dma_para->fd_out_hw_pa[uloop][uch]
|
|
& 0xf00000000) >> 32;
|
|
filter = 0xfffffffc | msb_bit_0;
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
= (u32)(fd_cur_cfg[POS_FDCON_IN_BA_MSB] &
|
|
filter);
|
|
} else if (j == 1) {
|
|
msb_bit_1 = (fd->dma_para->fd_out_hw_pa[uloop][uch]
|
|
& 0xf00000000) >> 32;
|
|
filter = 0xfffffcff | (msb_bit_1 << 8);
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
= (u32)(fd_cur_cfg[POS_FDCON_IN_BA_MSB] &
|
|
filter);
|
|
} else if (j == 2) {
|
|
msb_bit_2 = (fd->dma_para->fd_out_hw_pa[uloop][uch]
|
|
& 0xf00000000) >> 32;
|
|
filter = 0xfffcffff | (msb_bit_2 << 16);
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
= (u32)(fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
& filter);
|
|
} else if (j == 3) {
|
|
msb_bit_3 = (fd->dma_para->fd_out_hw_pa[uloop][uch]
|
|
& 0xf00000000) >> 32;
|
|
filter = 0xfcffffff | (msb_bit_3 << 24);
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
= (u32)(fd_cur_cfg[POS_FDCON_IN_BA_MSB]
|
|
& filter);
|
|
}
|
|
fd_cur_cfg[FD_IN_0 + j] = (u32)(
|
|
fd->dma_para
|
|
->fd_out_hw_pa[uloop]
|
|
[uch]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* OUT_FM_BASE_ADR */
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
if (fd_wdma_en[i][j]) {
|
|
if (j == 0) {
|
|
msb_bit_0 = (fd->dma_para->fd_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfffffffc | msb_bit_0;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_OUT_BA_MSB] & filter);
|
|
} else if (j == 1) {
|
|
msb_bit_1 = (fd->dma_para->fd_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfffffcff | (msb_bit_1 << 8);
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_OUT_BA_MSB] & filter);
|
|
} else if (j == 2) {
|
|
msb_bit_2 = (fd->dma_para->fd_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfffcffff | (msb_bit_2 << 16);
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_OUT_BA_MSB] & filter);
|
|
} else if (j == 3) {
|
|
msb_bit_3 = (fd->dma_para->fd_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfcffffff | (msb_bit_3 << 24);
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_OUT_BA_MSB] & filter);
|
|
}
|
|
|
|
fd_cur_cfg[FD_OUT_0 + j] =
|
|
(u32)(fd->dma_para->fd_out_hw_pa[i][j]);
|
|
}
|
|
}
|
|
|
|
/* KERNEL_BASE_ADR */
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
if (fd_ker_rdma_size[i][j]) {
|
|
if (j == 0) {
|
|
msb_bit_0 = (fd->dma_para->fd_kernel_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfffffffc | msb_bit_0;
|
|
fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] & filter);
|
|
|
|
} else if (j == 1) {
|
|
msb_bit_1 = (fd->dma_para->fd_kernel_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
filter = 0xfffffcff | (msb_bit_1 << 8);
|
|
fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] =
|
|
(u32)(fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] & filter);
|
|
}
|
|
|
|
fd_cur_cfg[FD_KERNEL_0 + j] =
|
|
(u32)(fd->dma_para->fd_kernel_pa[i][j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int aie_config_attr_network(struct mtk_aie_dev *fd,
|
|
struct aie_enq_info *aie_cfg)
|
|
{
|
|
bool isRegressionLoop = false;
|
|
void *fd_cfg;
|
|
u32 *fd_cur_cfg;
|
|
u16 fd_input_ht, fd_output_ht = 0x0;
|
|
u16 fd_out_y[4];
|
|
u8 i, j;
|
|
u8 uloop, uch, uidx;
|
|
u16 pyramid0_out_w, pyramid0_out_h;
|
|
int fd_conv_ht;
|
|
u16 src_crop_w;
|
|
u16 src_crop_h;
|
|
int msb_bit_0 = 0, msb_bit_1 = 0, msb_bit_2 = 0, msb_bit_3 = 0;
|
|
|
|
src_crop_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
|
|
src_crop_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
|
|
|
|
pyramid0_out_w = ATTR_MODE_PYRAMID_WIDTH;
|
|
pyramid0_out_h = pyramid0_out_w * src_crop_h / src_crop_w;
|
|
|
|
fd_cfg = fd->base_para->attr_fd_cfg_va[fd->attr_para->w_idx];
|
|
|
|
for (i = 0; i < attr_loop_num; i++) {
|
|
fd_cur_cfg = (u32 *)fd_cfg + FD_CONFIG_SIZE * i;
|
|
fd_cur_cfg[FD_INPUT_ROTATE] =
|
|
(fd_cur_cfg[FD_INPUT_ROTATE] & 0xFFFF0FFF) |
|
|
((aie_cfg->rotate_degree << 12) & 0x3000);
|
|
|
|
if (i == 0) {
|
|
fd_input_ht = pyramid0_out_h;
|
|
} else {
|
|
if (attr_out_stride2_as_in[i] == 0)
|
|
fd_input_ht = fd_output_ht;
|
|
else if (attr_out_stride2_as_in[i] == 1)
|
|
fd_input_ht = (fd_output_ht + 1) / 2;
|
|
}
|
|
fd_output_ht = DIV_ROUND_UP(fd_input_ht,
|
|
attr_fd_stride[i] +
|
|
2 * attr_fd_maxpool[i]);
|
|
fd_conv_ht = DIV_ROUND_UP(fd_input_ht, attr_fd_stride[i]);
|
|
|
|
fd_cur_cfg[FD_CONV_IMG_W_H] =
|
|
(fd_cur_cfg[FD_CONV_IMG_W_H] & 0xFFFF0000) |
|
|
(fd_conv_ht & 0xFFFF);
|
|
fd_cur_cfg[FD_IN_IMG_W_H] =
|
|
(fd_cur_cfg[FD_IN_IMG_W_H] & 0xFFFF0000) |
|
|
(fd_input_ht & 0xFFFF);
|
|
fd_cur_cfg[FD_OUT_IMG_W_H] =
|
|
(fd_cur_cfg[FD_OUT_IMG_W_H] & 0xFFFF0000) |
|
|
(fd_output_ht & 0xFFFF);
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE0] = aie_combine_u16(
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE0], fd_input_ht - 1);
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE1] = aie_combine_u16(
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE1], fd_input_ht - 1);
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE2] = aie_combine_u16(
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE2], fd_input_ht - 1);
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE3] = aie_combine_u16(
|
|
fd_cur_cfg[FD_IN_X_Y_SIZE3], fd_input_ht - 1);
|
|
|
|
isRegressionLoop = (i == age_out_rgs || i == gender_out_rgs ||
|
|
i == indian_out_rgs || i == race_out_rgs);
|
|
|
|
if (isRegressionLoop) {
|
|
fd_out_y[0] = 0;
|
|
fd_out_y[1] = 0;
|
|
fd_out_y[2] = 0;
|
|
fd_out_y[3] = 0;
|
|
} else {
|
|
fd_out_y[0] = fd_output_ht - 1;
|
|
fd_out_y[1] = fd_output_ht - 1;
|
|
if (attr_out_2size[i] == 0) {
|
|
fd_out_y[2] = fd_output_ht - 1;
|
|
fd_out_y[3] = fd_output_ht - 1;
|
|
} else {
|
|
fd_out_y[2] = (fd_output_ht + 1) / 2 - 1;
|
|
fd_out_y[3] = (fd_output_ht + 1) / 2 - 1;
|
|
}
|
|
}
|
|
|
|
for (j = 0; j < 4; j++)
|
|
fd_cur_cfg[FD_OUT_X_Y_SIZE0 + 2 * j] = aie_combine_u16(
|
|
fd_cur_cfg[FD_OUT_X_Y_SIZE0 + 2 * j],
|
|
fd_out_y[j]);
|
|
|
|
/* IN_FM_BASE_ADR */
|
|
if (i == 0) {
|
|
msb_bit_0 = (fd->base_para->rs_pym_rst_pa[0][0] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_1 = (fd->base_para->rs_pym_rst_pa[0][1] &
|
|
0xf00000000) >> 32;
|
|
msb_bit_2 = (fd->base_para->rs_pym_rst_pa[0][2] &
|
|
0xf00000000) >> 32;
|
|
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] = (u32)(msb_bit_0 |
|
|
msb_bit_1 << 8 |
|
|
msb_bit_2 << 16);
|
|
fd_cur_cfg[FD_IN_0] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][0]);
|
|
fd_cur_cfg[FD_IN_1] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][1]);
|
|
fd_cur_cfg[FD_IN_2] =
|
|
(u32)(fd->base_para->rs_pym_rst_pa[0][2]);
|
|
} else {
|
|
for (j = 0; j < input_WDMA_WRA_num; j++) {
|
|
|
|
if (attr_rdma_en[i][j][0] != -1) {
|
|
uloop = attr_rdma_en[i][j][0];
|
|
uch = attr_rdma_en[i][j][1];
|
|
if (j == 0) {
|
|
msb_bit_0 =
|
|
(fd->dma_para->attr_out_hw_pa[uloop][uch] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] |= (u32)(msb_bit_0);
|
|
} else if (j == 1) {
|
|
msb_bit_1 =
|
|
(fd->dma_para->attr_out_hw_pa[uloop][uch] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] |=
|
|
(u32)(msb_bit_1 << 8);
|
|
} else if (j == 2) {
|
|
msb_bit_2 =
|
|
(fd->dma_para->attr_out_hw_pa[uloop][uch] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] |=
|
|
(u32)(msb_bit_2 << 16);
|
|
} else if (j == 3) {
|
|
msb_bit_3 =
|
|
(fd->dma_para->attr_out_hw_pa[uloop][uch] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_IN_BA_MSB] |=
|
|
(u32)(msb_bit_3 << 24);
|
|
}
|
|
|
|
fd_cur_cfg[FD_IN_0 + j] = (u32)(
|
|
fd->dma_para
|
|
->attr_out_hw_pa[uloop]
|
|
[uch]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* OUT_FM_BASE_ADR */
|
|
for (j = 0; j < output_WDMA_WRA_num; j++) {
|
|
if (attr_wdma_en[i][j]) {
|
|
uidx = fd->attr_para->w_idx;
|
|
if (i == age_out_rgs && j == 0) {
|
|
msb_bit_0 = (fd->dma_para->age_out_hw_pa[uidx] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |= (u32)msb_bit_0;
|
|
fd_cur_cfg[FD_OUT_0 + j] = (u32)(
|
|
fd->dma_para
|
|
->age_out_hw_pa[uidx]);
|
|
} else if (i == gender_out_rgs && j == 0) {
|
|
msb_bit_0 = (fd->dma_para->gender_out_hw_pa[uidx] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |= (u32)msb_bit_0;
|
|
fd_cur_cfg[FD_OUT_0 + j] = (u32)(
|
|
fd->dma_para->gender_out_hw_pa
|
|
[uidx]);
|
|
} else if (i == indian_out_rgs && j == 0) {
|
|
msb_bit_0 = (fd->dma_para->isIndian_out_hw_pa[uidx] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |= (u32)msb_bit_0;
|
|
|
|
fd_cur_cfg[FD_OUT_0 + j] = (u32)(
|
|
fd->dma_para->isIndian_out_hw_pa
|
|
[uidx]);
|
|
} else if (i == race_out_rgs && j == 0) {
|
|
msb_bit_0 = (fd->dma_para->race_out_hw_pa[uidx] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |= (u32)msb_bit_0;
|
|
fd_cur_cfg[FD_OUT_0 + j] = (u32)(
|
|
fd->dma_para
|
|
->race_out_hw_pa[uidx]);
|
|
} else {
|
|
if (j == 0) {
|
|
msb_bit_0 = (fd->dma_para->attr_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |=
|
|
(u32)(msb_bit_0);
|
|
} else if (j == 1) {
|
|
msb_bit_1 = (fd->dma_para->attr_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |=
|
|
(u32)(msb_bit_1 << 8);
|
|
} else if (j == 2) {
|
|
msb_bit_2 = (fd->dma_para->attr_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |=
|
|
(u32)(msb_bit_2 << 16);
|
|
} else if (j == 3) {
|
|
msb_bit_3 = (fd->dma_para->attr_out_hw_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_OUT_BA_MSB] |=
|
|
(u32)(msb_bit_3 << 24);
|
|
}
|
|
|
|
fd_cur_cfg[FD_OUT_0 + j] = (u32)(
|
|
fd->dma_para
|
|
->attr_out_hw_pa[i][j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* KERNEL_BASE_ADR */
|
|
for (j = 0; j < kernel_RDMA_RA_num; j++) {
|
|
if (j == 0) {
|
|
msb_bit_0 = (fd->dma_para->attr_kernel_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] |= (u32)(msb_bit_0);
|
|
} else if (j == 1) {
|
|
msb_bit_1 = (fd->dma_para->attr_kernel_pa[i][j] &
|
|
0xf00000000) >> 32;
|
|
fd_cur_cfg[POS_FDCON_KERNEL_BA_MSB] |= (u32)(msb_bit_1 << 8);
|
|
}
|
|
fd_cur_cfg[FD_KERNEL_0 + j] =
|
|
(u32)(fd->dma_para->attr_kernel_pa[i][j]);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int aie_config_dram(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (aie_cfg->sel_mode == 0) { /* FDMODE */
|
|
ret = aie_config_y2r(fd, aie_cfg, aie_cfg->sel_mode);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = aie_config_rs(fd, aie_cfg);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = aie_config_network(fd, aie_cfg);
|
|
if (ret)
|
|
return ret;
|
|
|
|
} else if (aie_cfg->sel_mode == 1) { /* ATTRIBUTEMODE */
|
|
ret = aie_config_y2r(fd, aie_cfg, aie_cfg->sel_mode);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = aie_config_attr_network(fd, aie_cfg);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void aie_reset(struct mtk_aie_dev *fd)
|
|
{
|
|
writel(0x30000, fd->fd_base + AIE_START_REG);
|
|
writel(0x0, fd->fd_base + AIE_START_REG);
|
|
}
|
|
|
|
int aie_alloc_aie_buf(struct mtk_aie_dev *fd)
|
|
{
|
|
int ret = -ENOMEM;
|
|
int err_tag = 0;
|
|
|
|
memset(&fd->st_info, 0, sizeof(fd->st_info));
|
|
aie_init_table(fd, fd->base_para->max_pyramid_width,
|
|
fd->base_para->max_pyramid_height);
|
|
aie_get_data_size(fd, fd->base_para->max_img_width,
|
|
fd->base_para->max_img_height);
|
|
ret = aie_alloc_dram_buf(fd); //config
|
|
if (ret)
|
|
goto dram_fail;
|
|
|
|
ret = aie_alloc_output_buf(fd); //pyramid
|
|
if (ret)
|
|
goto output_fail;
|
|
|
|
ret = aie_alloc_fddma_buf(fd); //inter-production
|
|
if (ret)
|
|
goto fddma_fail;
|
|
#ifdef FLD
|
|
ret = aie_alloc_fld_buf(fd);
|
|
if (ret)
|
|
goto fld_fail;
|
|
#endif
|
|
|
|
dev_info(fd->dev,
|
|
"c(%llx/%llx/%llx)o(%llx/%llx/%llx/%llx/%llx)f(%llx/%llx/%llx/%llx/%llx/%llx/%llx)\n",
|
|
fd->rs_cfg_data.pa, fd->fd_cfg_data.pa, fd->yuv2rgb_cfg_data.pa,
|
|
fd->rs_output_hw.pa, fd->fd_dma_hw.pa, fd->fd_dma_result_hw.pa,
|
|
fd->fd_kernel_hw.pa, fd->fd_attr_dma_hw.pa, fd->fld_cv_hw.pa,
|
|
fd->fld_fp_hw.pa, fd->fld_leafnode_hw.pa, fd->fld_tree_02_hw.pa,
|
|
fd->fld_tree_13_hw.pa, fd->fld_blink_weight_hw.pa, fd->fld_output_hw.pa
|
|
);
|
|
aie_arrange_fddma_buf(fd);
|
|
aie_arrange_kernel_buf(fd);
|
|
aie_arrange_attrdma_buf(fd);
|
|
aie_arrange_result_dma_buf(fd);
|
|
#ifdef FLD
|
|
aie_arrange_fld_buf(fd);
|
|
#endif
|
|
ret = aie_load_fw(fd);
|
|
if (ret)
|
|
goto load_fw_fail;
|
|
|
|
return ret;
|
|
|
|
load_fw_fail:
|
|
aie_free_fddma_buf(fd);
|
|
err_tag++;
|
|
#ifdef FLD
|
|
fld_fail:
|
|
aie_free_fld_buf(fd);
|
|
err_tag++;
|
|
#endif
|
|
fddma_fail:
|
|
aie_free_output_buf(fd);
|
|
err_tag++;
|
|
|
|
output_fail:
|
|
aie_free_dram_buf(fd);
|
|
err_tag++;
|
|
|
|
dram_fail:
|
|
kfree(fd->dma_para);
|
|
fd->dma_para = NULL;
|
|
err_tag++;
|
|
|
|
dev_info(fd->dev, "Failed to alloc aie buf: %d\n", err_tag);
|
|
return ret;
|
|
|
|
|
|
}
|
|
|
|
int aie_init(struct mtk_aie_dev *fd)
|
|
{
|
|
int err_tag = 0;
|
|
|
|
fd->fd_state = STATE_NA;
|
|
|
|
writel(0x00400020, fd->fd_base + FDVT_RDA_0_CON3_REG);
|
|
writel(0x00400020, fd->fd_base + FDVT_RDA_1_CON3_REG);
|
|
|
|
writel(0x00400020, fd->fd_base + FDVT_RDB_0_CON3_REG);
|
|
writel(0x00400020, fd->fd_base + FDVT_RDB_1_CON3_REG);
|
|
|
|
writel(0x00400020, fd->fd_base + FDVT_WRA_0_CON3_REG);
|
|
writel(0x00400020, fd->fd_base + FDVT_WRA_1_CON3_REG);
|
|
|
|
writel(0x00400020, fd->fd_base + FDVT_WRB_0_CON3_REG);
|
|
writel(0x00400020, fd->fd_base + FDVT_WRB_0_CON3_REG);
|
|
|
|
#if CHECK_SERVICE_IF_0
|
|
mtk_iommu_register_fault_callback(M4U_PORT_L12_IPE_FDVT_2ND_RDA0,
|
|
(mtk_iommu_fault_callback_t)FDVT_M4U_TranslationFault_callback,
|
|
NULL, false);
|
|
mtk_iommu_register_fault_callback(M4U_PORT_L12_IPE_FDVT_2ND_RDB0,
|
|
(mtk_iommu_fault_callback_t)FDVT_M4U_TranslationFault_callback,
|
|
NULL, false);
|
|
mtk_iommu_register_fault_callback(M4U_PORT_L12_IPE_FDVT_2ND_WRA0,
|
|
(mtk_iommu_fault_callback_t)FDVT_M4U_TranslationFault_callback,
|
|
NULL, false);
|
|
mtk_iommu_register_fault_callback(M4U_PORT_L12_IPE_FDVT_2ND_WRB0,
|
|
(mtk_iommu_fault_callback_t)FDVT_M4U_TranslationFault_callback,
|
|
NULL, false);
|
|
#endif
|
|
fd->base_para = kmalloc(sizeof(struct aie_para), GFP_KERNEL);
|
|
if (fd->base_para == NULL)
|
|
return -ENOMEM;
|
|
|
|
fd->attr_para = kmalloc(sizeof(struct aie_attr_para), GFP_KERNEL);
|
|
if (fd->attr_para == NULL)
|
|
goto attr_para_fail;
|
|
#ifdef FLD
|
|
fd->fld_para = kmalloc(sizeof(struct aie_fld_para), GFP_KERNEL);
|
|
if (fd->fld_para == NULL)
|
|
goto fld_para_fail;
|
|
#endif
|
|
fd->dma_para = kmalloc(sizeof(struct aie_fd_dma_para), GFP_KERNEL);
|
|
if (fd->dma_para == NULL)
|
|
goto dma_para_fail;
|
|
|
|
fd->attr_para->r_idx = 0;
|
|
fd->attr_para->w_idx = 0;
|
|
|
|
fd->fd_state = STATE_INIT;
|
|
|
|
return 0;
|
|
|
|
dma_para_fail:
|
|
kfree(fd->attr_para);
|
|
fd->attr_para = NULL;
|
|
err_tag++;
|
|
#ifdef FLD
|
|
fld_para_fail:
|
|
kfree(fd->fld_para);
|
|
fd->fld_para = NULL;
|
|
err_tag++;
|
|
#endif
|
|
attr_para_fail:
|
|
kfree(fd->base_para);
|
|
fd->base_para = NULL;
|
|
err_tag++;
|
|
|
|
dev_info(fd->dev, "Failed to init aie: %d\n", err_tag);
|
|
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void aie_uninit(struct mtk_aie_dev *fd)
|
|
{
|
|
fd->fd_state = STATE_NA;
|
|
|
|
aie_free_dram_buf(fd);
|
|
aie_free_fddma_buf(fd);
|
|
#ifdef FLD
|
|
aie_free_fld_buf(fd);
|
|
#endif
|
|
if (g_user_param.is_secure)
|
|
aie_free_sec_buf(fd);
|
|
else
|
|
aie_free_output_buf(fd);
|
|
|
|
if (fd->base_para != NULL) {
|
|
kfree(fd->base_para);
|
|
fd->base_para = NULL;
|
|
}
|
|
if (fd->attr_para != NULL) {
|
|
kfree(fd->attr_para);
|
|
fd->attr_para = NULL;
|
|
}
|
|
if (fd->dma_para != NULL) {
|
|
kfree(fd->dma_para);
|
|
fd->dma_para = NULL;
|
|
}
|
|
#ifdef FLD
|
|
if (fd->fld_para != NULL) {
|
|
kfree(fd->fld_para);
|
|
fd->fld_para = NULL;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int aie_prepare(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (fd->fd_state != STATE_INIT) {
|
|
dev_info(fd->dev, "%s fd state fail: %d\n",
|
|
__func__, fd->fd_state);
|
|
return -EINVAL;
|
|
}
|
|
|
|
memset(&fd->reg_cfg, 0, sizeof(fd->reg_cfg));
|
|
|
|
if (aie_cfg->pyramid_base_width == 0) {
|
|
fd->base_para->pyramid_width =
|
|
fd->base_para->max_pyramid_width;
|
|
fd->base_para->pyramid_height =
|
|
fd->base_para->max_pyramid_height;
|
|
fd->base_para->number_of_pyramid = 3;
|
|
} else {
|
|
if (aie_cfg->pyramid_base_width >
|
|
fd->base_para->max_pyramid_width ||
|
|
aie_cfg->pyramid_base_height >
|
|
fd->base_para->max_pyramid_height ||
|
|
aie_cfg->number_of_pyramid > 3 ||
|
|
aie_cfg->number_of_pyramid <= 0) {
|
|
dev_info(fd->dev, "err: base w: %d, h: %d, num: %d\n",
|
|
aie_cfg->pyramid_base_width,
|
|
aie_cfg->pyramid_base_height,
|
|
aie_cfg->number_of_pyramid);
|
|
dev_info(fd->dev, "err: max w: %d, h: %d\n",
|
|
fd->base_para->max_pyramid_width,
|
|
fd->base_para->max_pyramid_height);
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
fd->base_para->pyramid_height =
|
|
fd->base_para->max_pyramid_height;
|
|
fd->base_para->number_of_pyramid =
|
|
aie_cfg->number_of_pyramid;
|
|
if (aie_cfg->pyramid_base_width !=
|
|
fd->base_para->pyramid_width) {
|
|
dev_dbg(fd->dev, "pre: %d, cur: %d, num: %d\n",
|
|
fd->base_para->pyramid_width,
|
|
aie_cfg->pyramid_base_width,
|
|
fd->base_para->number_of_pyramid);
|
|
fd->base_para->pyramid_width =
|
|
aie_cfg->pyramid_base_width;
|
|
aie_update_table(
|
|
fd, fd->base_para->pyramid_width,
|
|
fd->base_para->pyramid_height);
|
|
aie_update_fddma_buf(fd);
|
|
}
|
|
}
|
|
|
|
if ((aie_cfg->src_img_width > fd->base_para->max_img_width) ||
|
|
(aie_cfg->src_img_height > fd->base_para->max_img_height)) {
|
|
dev_info(
|
|
fd->dev,
|
|
"AIE error: Enque Size error, Src_WD: %d, Src_HT: %d\n",
|
|
aie_cfg->src_img_width, aie_cfg->src_img_height);
|
|
|
|
dev_info(fd->dev, "AIE error: MAX_Src_WD: %d, MAX_Src_HT: %d\n",
|
|
fd->base_para->max_img_width,
|
|
fd->base_para->max_img_height);
|
|
return -EINVAL;
|
|
}
|
|
|
|
//aie_reset_output_buf(fd, aie_cfg);
|
|
|
|
fd->reg_cfg.fd_mode = aie_cfg->sel_mode;
|
|
if (aie_cfg->sel_mode == 0) { /* FDMODE */
|
|
fd->reg_cfg.rs_adr = (u32)fd->base_para->fd_rs_cfg_pa;
|
|
fd->reg_cfg.yuv2rgb_adr = (u32)fd->base_para->fd_yuv2rgb_cfg_pa;
|
|
fd->reg_cfg.fd_adr = (u32)fd->base_para->fd_fd_cfg_pa +
|
|
FD_CONFIG_SIZE * 4 * fd_loop_num /
|
|
3 * (3 - aie_cfg->number_of_pyramid);
|
|
|
|
} else if (aie_cfg->sel_mode == 1) { /* ATTRMODE */
|
|
fd->reg_cfg.yuv2rgb_adr =
|
|
(u32)fd->base_para
|
|
->attr_yuv2rgb_cfg_pa[fd->attr_para->w_idx];
|
|
fd->reg_cfg.fd_adr =
|
|
(u32)fd->base_para
|
|
->attr_fd_cfg_pa[fd->attr_para->w_idx];
|
|
} else {
|
|
dev_info(fd->dev, "AIE error, Mode: %d", aie_cfg->sel_mode);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = aie_update_cfg(fd, aie_cfg);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = aie_config_dram(fd, aie_cfg);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (aie_cfg->sel_mode == 1) { /* ATTRMODE */
|
|
fd->attr_para->w_idx =
|
|
(fd->attr_para->w_idx + 1) % MAX_ENQUE_FRAME_NUM;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef FDVT_USE_GCE
|
|
static void AIECmdqCB(struct cmdq_cb_data data)
|
|
{
|
|
struct mtk_aie_dev *fd = (struct mtk_aie_dev *)data.data;
|
|
|
|
queue_work(fd->frame_done_wq, &fd->req_work.work);
|
|
}
|
|
|
|
static void AIECmdqSecCB(struct cmdq_cb_data data)
|
|
{
|
|
struct mtk_aie_dev *fd = (struct mtk_aie_dev *)data.data;
|
|
|
|
dev_info(fd->dev, "AIE SEC CMDQ CB\n");
|
|
}
|
|
|
|
static void AieSecPktCB(struct cmdq_cb_data data)
|
|
{
|
|
struct cmdq_pkt *sec_pkt = (struct cmdq_pkt *)data.data;
|
|
|
|
cmdq_pkt_destroy(sec_pkt);
|
|
g_sec_pkt = NULL;
|
|
|
|
}
|
|
|
|
void config_aie_cmdq_secure_init(struct mtk_aie_dev *fd)
|
|
{
|
|
g_sec_pkt = cmdq_pkt_create(fd->fdvt_secure_clt);
|
|
|
|
cmdq_sec_pkt_set_data(g_sec_pkt, 0, 0, CMDQ_SEC_DEBUG, CMDQ_METAEX_TZMP);
|
|
cmdq_sec_pkt_set_mtee(g_sec_pkt, true);
|
|
cmdq_pkt_finalize_loop(g_sec_pkt);
|
|
cmdq_pkt_flush_threaded(g_sec_pkt, AieSecPktCB, (void *)g_sec_pkt);
|
|
}
|
|
|
|
void aie_enable_secure_domain(struct mtk_aie_dev *fd)
|
|
{
|
|
struct cmdq_pkt *pkt = NULL;
|
|
|
|
pkt = cmdq_pkt_create(fd->fdvt_clt);
|
|
cmdq_pkt_set_event(pkt, fd->fdvt_sec_wait);
|
|
cmdq_pkt_wfe(pkt, fd->fdvt_sec_set);
|
|
cmdq_pkt_flush_async(pkt, AIECmdqSecCB, (void *)fd); /* flush and destry in cmdq*/
|
|
cmdq_pkt_wait_complete(pkt);
|
|
cmdq_pkt_destroy(pkt);
|
|
}
|
|
|
|
void aie_disable_secure_domain(struct mtk_aie_dev *fd)
|
|
{
|
|
struct cmdq_pkt *pkt = NULL;
|
|
|
|
pkt = cmdq_pkt_create(fd->fdvt_clt);
|
|
cmdq_pkt_set_event(pkt, fd->fdvt_sec_wait);
|
|
cmdq_pkt_wfe(pkt, fd->fdvt_sec_set);
|
|
cmdq_pkt_flush_async(pkt, AIECmdqSecCB, (void *)fd);/* flush and destry in cmdq*/
|
|
cmdq_pkt_wait_complete(pkt);
|
|
cmdq_pkt_destroy(pkt);
|
|
}
|
|
|
|
void config_aie_cmdq_hw(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
struct cmdq_pkt *pkt = NULL;
|
|
unsigned int loop_num = 0;
|
|
unsigned int loop_reg_val = 0;
|
|
|
|
pkt = cmdq_pkt_create(fd->fdvt_clt);
|
|
/*for early porting*/
|
|
if (aie_cfg->sel_mode == 0) {
|
|
cmdq_pkt_write(pkt, NULL, FDVT_ENABLE_HW, 0x00000111,
|
|
CMDQ_REG_MASK);
|
|
loop_num = fd_loop_num / 3 * (aie_cfg->number_of_pyramid);
|
|
loop_reg_val = (loop_num << 8) |
|
|
(aie_cfg->number_of_pyramid - 1);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_LOOP_HW, loop_reg_val, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_write(pkt, NULL, FDVT_INT_EN_HW, 0x1, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_RS_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.rs_adr, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_FD_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.fd_adr, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_YUV2RGB_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.yuv2rgb_adr, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_write(pkt, NULL, FDVT_START_HW, 0x1, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_wfe(pkt, fd->fdvt_event_id);
|
|
/*cmdqRecWait(handle, CMDQ_EVENT_IPE_EVENT_TX_FRAME_DONE_0);*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_START_HW, 0x0, CMDQ_REG_MASK);
|
|
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
cmdq_pkt_write(pkt, NULL, FDVT_ENABLE_HW, 0x00000101,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_LOOP_HW, 0x00001A00,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_INT_EN_HW, 0x1, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_RS_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.rs_adr,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_FD_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.fd_adr,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_YUV2RGB_CON_BASE_ADR_HW,
|
|
fd->reg_cfg.yuv2rgb_adr,
|
|
CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_write(pkt, NULL, FDVT_START_HW, 0x1, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_wfe(pkt, fd->fdvt_event_id);
|
|
/*cmdqRecWait(handle, CMDQ_EVENT_IPE_EVENT_TX_FRAME_DONE_0);*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_START_HW, 0x0, CMDQ_REG_MASK);
|
|
|
|
} else if (aie_cfg->sel_mode == 3) {
|
|
int i = 0;
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + AIE_START_REG, 0x10, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_DMA_CTL_HW, 0x00011111, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_EN, 0x01111111, CMDQ_REG_MASK);
|
|
|
|
for (i = 0; i < aie_cfg->fld_face_num; i++) {
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_BASE_ADDR_FACE_0 + i * 0x4,
|
|
aie_cfg->src_img_addr, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_face_info_0[i],
|
|
(aie_cfg->fld_input[i].fld_in_crop.x1 << 16) |
|
|
aie_cfg->fld_input[i].fld_in_crop.y1, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_face_info_1[i],
|
|
(aie_cfg->fld_input[i].fld_in_crop.x2 << 16) |
|
|
aie_cfg->fld_input[i].fld_in_crop.y2, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_face_info_2[i],
|
|
(aie_cfg->fld_input[i].fld_in_rip << 4) |
|
|
aie_cfg->fld_input[i].fld_in_rop, CMDQ_REG_MASK);
|
|
}
|
|
|
|
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_MODEL_PARA1,
|
|
(fld_forest << 16) | (aie_cfg->fld_face_num << 28) | fld_point,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_MODEL_PARA14,
|
|
(0xd << 16) | 0xfe9, CMDQ_REG_MASK);
|
|
|
|
/*fld kernel model pa setting*/
|
|
for (i = 0; i < FLD_MAX_INPUT; i++) {
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_pl_in_addr_0[i],
|
|
fd->dma_para->fld_tree02_pa[i], CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_pl_in_addr_1[i],
|
|
fd->dma_para->fld_tree13_pa[i], CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_pl_in_addr_2[i],
|
|
fd->dma_para->fld_cv_pa[i], CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_pl_in_addr_3[i],
|
|
fd->dma_para->fld_fp_pa[i], CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + fld_sh_in_addr[i],
|
|
fd->dma_para->fld_leafnode_pa[i], CMDQ_REG_MASK);
|
|
}
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_BS_IN_BASE_ADDR_14,
|
|
fd->dma_para->fld_blink_weight_pa, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_SRC_WD_HT,
|
|
(aie_cfg->src_img_width << 16) | aie_cfg->src_img_height,
|
|
CMDQ_REG_MASK);
|
|
|
|
/*input settings*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_SIZE_0,
|
|
0x007c003f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_STRIDE_0,
|
|
0x0040000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_SIZE_1,
|
|
0x007c003f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_STRIDE_1,
|
|
0x0040000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_SIZE_2_0,
|
|
0x0016003f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_STRIDE_2_0,
|
|
0x0040000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + + FLD_PL_IN_SIZE_2_1,
|
|
0x0013003f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + + FLD_PL_IN_STRIDE_2_1,
|
|
0x0040000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_SIZE_2_2,
|
|
0x0013003f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_STRIDE_2_2,
|
|
0x0040000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_SIZE_3,
|
|
0x00a6001f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PL_IN_STRIDE_3,
|
|
0x0020000f, CMDQ_REG_MASK);
|
|
|
|
/*output setting*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_SH_IN_SIZE_0,
|
|
((2400 * aie_cfg->fld_face_num - 1) << 16) | 127,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_SH_IN_STRIDE_0, 0x0010000f,
|
|
CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_TR_OUT_BASE_ADDR_0,
|
|
fd->dma_para->fld_output_pa, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_TR_OUT_SIZE_0,
|
|
((aie_cfg->fld_face_num-1) << 16) | 0x6f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_TR_OUT_STRIDE_0,
|
|
0x0070000f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PP_OUT_BASE_ADDR_0,
|
|
fd->dma_para->fld_output_pa, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PP_OUT_SIZE_0,
|
|
((aie_cfg->fld_face_num-1) << 16) | 0x6f, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_PP_OUT_STRIDE_0,
|
|
0x0070000f, CMDQ_REG_MASK);
|
|
|
|
/*cv score*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_BS_BIAS, 0x00000001, CMDQ_REG_MASK);
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_CV_FM_RANGE_0,
|
|
0x0000b835, CMDQ_REG_MASK); //8E8
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_CV_FM_RANGE_1,
|
|
0xffff5cba, CMDQ_REG_MASK); //8EC
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_CV_PM_RANGE_0,
|
|
0x00005ed5, CMDQ_REG_MASK); //8F0
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_CV_PM_RANGE_1,
|
|
0xffff910d, CMDQ_REG_MASK); //8F4 //TEMP
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_BS_RANGE_0,
|
|
0x0000031e, CMDQ_REG_MASK); //8F8
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + FLD_BS_RANGE_1,
|
|
0xfffffcae, CMDQ_REG_MASK); //8FC
|
|
|
|
/*fld mode + trigger start*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_BASE_HW + AIE_START_REG, 0x11, CMDQ_REG_MASK);
|
|
|
|
cmdq_pkt_wfe(pkt, fd->fdvt_event_id);
|
|
/*cmdqRecWait(handle, CMDQ_EVENT_IPE_EVENT_TX_FRAME_DONE_0);*/
|
|
cmdq_pkt_write(pkt, NULL, FDVT_START_HW, 0x0, CMDQ_REG_MASK);
|
|
}
|
|
|
|
//cmdq_pkt_flush(pkt);
|
|
cmdq_pkt_flush_async(pkt, AIECmdqCB, (void *)fd); /* flush and destry in cmdq*/
|
|
cmdq_pkt_wait_complete(pkt);
|
|
/* release resource */
|
|
cmdq_pkt_destroy(pkt);
|
|
}
|
|
#endif
|
|
void aie_execute(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
#ifndef FDVT_USE_GCE
|
|
|
|
unsigned int loop_num = 0;
|
|
unsigned int loop_reg_val = 0;
|
|
|
|
if (aie_cfg->sel_mode == 0) {
|
|
writel(0x00000111, fd->fd_base + AIE_ENABLE_REG);
|
|
loop_num = fd_loop_num / 3 * (aie_cfg->number_of_pyramid);
|
|
loop_reg_val = (loop_num << 8) |
|
|
(aie_cfg->number_of_pyramid - 1);
|
|
writel(loop_reg_val, fd->fd_base + AIE_LOOP_REG);
|
|
writel(0x1, fd->fd_base + AIE_INT_EN_REG);
|
|
writel(fd->reg_cfg.rs_adr,
|
|
fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
|
|
writel(fd->reg_cfg.fd_adr,
|
|
fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
|
|
writel(fd->reg_cfg.yuv2rgb_adr,
|
|
fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
|
|
writel(0x1, fd->fd_base + AIE_START_REG);
|
|
} else if (aie_cfg->sel_mode == 1) {
|
|
writel(0x00000101, fd->fd_base + AIE_ENABLE_REG);
|
|
writel(0x00001A00, fd->fd_base + AIE_LOOP_REG);
|
|
writel(0x1, fd->fd_base + AIE_INT_EN_REG);
|
|
writel(fd->reg_cfg.rs_adr,
|
|
fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
|
|
writel(fd->reg_cfg.fd_adr,
|
|
fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
|
|
writel(fd->reg_cfg.yuv2rgb_adr,
|
|
fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
|
|
writel(0x1, fd->fd_base + AIE_START_REG);
|
|
} else if (aie_cfg->sel_mode == 3) {
|
|
int i = 0;
|
|
|
|
writel(0x10, fd->fd_base + AIE_START_REG);
|
|
writel(0x00011111, fd->fd_base + AIE_DMA_CTL_REG);
|
|
writel(0x01111111, fd->fd_base + FLD_EN);
|
|
for (i = 0; i < aie_cfg->fld_face_num; i++) {
|
|
writel(aie_cfg->src_img_addr, fd->fd_base + FLD_BASE_ADDR_FACE_0 + i * 0x4);
|
|
writel((aie_cfg->fld_input[i].fld_in_crop.x1 << 16) |
|
|
aie_cfg->fld_input[i].fld_in_crop.y1,
|
|
fd->fd_base + fld_face_info_0[i]);
|
|
writel((aie_cfg->fld_input[i].fld_in_crop.x2 << 16) |
|
|
aie_cfg->fld_input[i].fld_in_crop.y2,
|
|
fd->fd_base + fld_face_info_1[i]);
|
|
writel(aie_cfg->fld_input[i].fld_in_rip << 4 |
|
|
aie_cfg->fld_input[i].fld_in_rop,
|
|
fd->fd_base + fld_face_info_2[i]);
|
|
}
|
|
|
|
|
|
writel((fld_forest << 16) | (aie_cfg->fld_face_num << 28) | fld_point,
|
|
fd->fd_base + FLD_MODEL_PARA1);
|
|
writel((0xd << 16) | 0xfe9, fd->fd_base + FLD_MODEL_PARA14);
|
|
|
|
/*fld kernel model pa setting*/
|
|
for (i = 0; i < FLD_MAX_INPUT; i++) {
|
|
writel(fd->dma_para->fld_tree02_pa[i], fd->fd_base + fld_pl_in_addr_0[i]);
|
|
writel(fd->dma_para->fld_tree13_pa[i], fd->fd_base + fld_pl_in_addr_1[i]);
|
|
writel(fd->dma_para->fld_cv_pa[i], fd->fd_base + fld_pl_in_addr_2[i]);
|
|
writel(fd->dma_para->fld_fp_pa[i], fd->fd_base + fld_pl_in_addr_3[i]);
|
|
writel(fd->dma_para->fld_leafnode_pa[i], fd->fd_base + fld_sh_in_addr[i]);
|
|
}
|
|
writel(fd->dma_para->fld_blink_weight_pa, fd->fd_base + FLD_BS_IN_BASE_ADDR_14);
|
|
|
|
writel((aie_cfg->src_img_width << 16) |
|
|
aie_cfg->src_img_height, fd->fd_base + FLD_SRC_WD_HT);
|
|
|
|
/*input settings*/
|
|
writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_0);
|
|
writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_0);
|
|
writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_1);
|
|
writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_1);
|
|
writel(0x0016003f, fd->fd_base + FLD_PL_IN_SIZE_2_0);
|
|
writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_0);
|
|
writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_1);
|
|
writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_1);
|
|
writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_2);
|
|
writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_2);
|
|
writel(0x00a6001f, fd->fd_base + FLD_PL_IN_SIZE_3);
|
|
writel(0x0020000f, fd->fd_base + FLD_PL_IN_STRIDE_3);
|
|
|
|
/*output setting*/
|
|
writel((2400 * aie_cfg->fld_face_num - 1) << 16 | 127,
|
|
fd->fd_base + FLD_SH_IN_SIZE_0);
|
|
writel(0x0010000f, fd->fd_base + FLD_SH_IN_STRIDE_0);
|
|
writel(fd->dma_para->fld_output_pa, fd->fd_base + FLD_TR_OUT_BASE_ADDR_0);
|
|
writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f, fd->fd_base + FLD_TR_OUT_SIZE_0);
|
|
writel(0x0070000f, fd->fd_base + FLD_TR_OUT_STRIDE_0);
|
|
writel(fd->dma_para->fld_output_pa, fd->fd_base + FLD_PP_OUT_BASE_ADDR_0);
|
|
writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f, fd->fd_base + FLD_PP_OUT_SIZE_0);
|
|
writel(0x0070000f, fd->fd_base + FLD_PP_OUT_STRIDE_0);
|
|
|
|
/*cv score*/
|
|
writel(0x00000001, fd->fd_base + FLD_BS_BIAS);
|
|
writel(0x0000b835, fd->fd_base + FLD_CV_FM_RANGE_0); //8E8
|
|
writel(0xffff5cba, fd->fd_base + FLD_CV_FM_RANGE_1); //8EC
|
|
writel(0x00005ed5, fd->fd_base + FLD_CV_PM_RANGE_0); //8F0
|
|
writel(0xffff910d, fd->fd_base + FLD_CV_PM_RANGE_1); //8F4 //temp 310
|
|
writel(0x0000031e, fd->fd_base + FLD_BS_RANGE_0); //8F8
|
|
writel(0xfffffcae, fd->fd_base + FLD_BS_RANGE_1); //8FC
|
|
|
|
/*fld mode + trigger start*/
|
|
writel(0x11, fd->fd_base + AIE_START_REG);
|
|
}
|
|
#else
|
|
config_aie_cmdq_hw(fd, aie_cfg);
|
|
#endif
|
|
|
|
}
|
|
|
|
void aie_execute_pose(struct mtk_aie_dev *fd)
|
|
{
|
|
writel(0x00000100, fd->fd_base + AIE_ENABLE_REG);
|
|
writel(0x00000300, fd->fd_base + AIE_LOOP_REG);
|
|
writel(0x1, fd->fd_base + AIE_INT_EN_REG);
|
|
writel(fd->reg_cfg.fd_pose_adr, fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
|
|
writel(0x1, fd->fd_base + AIE_START_REG);
|
|
}
|
|
|
|
void aie_irqhandle(struct mtk_aie_dev *fd)
|
|
{
|
|
int status;
|
|
|
|
writel(0x0, fd->fd_base + AIE_START_REG);
|
|
|
|
/* interrupt read clear */
|
|
status = readl(fd->fd_base + AIE_INT_REG);
|
|
}
|
|
|
|
/* return aie_cfg to user space */
|
|
void aie_get_fd_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
void *fd_pym_result[PYM_NUM];
|
|
u32 fd_result_hw, fd_result_1_hw;
|
|
u32 fd_total_num;
|
|
u32 fd_pyramid_num[PYM_NUM];
|
|
|
|
aie_cfg->sel_mode = fd->base_para->sel_mode;
|
|
aie_cfg->rotate_degree = fd->base_para->rotate_degree;
|
|
aie_cfg->src_img_addr = fd->base_para->src_img_addr;
|
|
aie_cfg->src_img_addr_uv = fd->base_para->src_img_addr_uv;
|
|
aie_cfg->src_img_width = fd->base_para->img_width;
|
|
aie_cfg->src_img_height = fd->base_para->img_height;
|
|
aie_cfg->src_img_fmt = fd->base_para->src_img_fmt;
|
|
aie_cfg->fd_version = FD_VERSION;
|
|
aie_cfg->attr_version = ATTR_VERSION;
|
|
|
|
fd_pym_result[0] = fd->dma_para->fd_out_hw_va[rpn0_loop_num][0];
|
|
fd_pym_result[1] = fd->dma_para->fd_out_hw_va[rpn1_loop_num][0];
|
|
fd_pym_result[2] = fd->dma_para->fd_out_hw_va[rpn2_loop_num][0];
|
|
|
|
fd_result_hw = fd->reg_cfg.hw_result;
|
|
fd_result_1_hw = fd->reg_cfg.hw_result1;
|
|
fd_total_num = fd_result_hw & 0xFFF;
|
|
fd_pyramid_num[0] = (fd_result_hw & 0xFFF0000) >> 16;
|
|
fd_pyramid_num[1] = fd_result_1_hw & 0xFFF;
|
|
fd_pyramid_num[2] = (fd_result_1_hw & 0xFFF0000) >> 16;
|
|
|
|
aie_cfg->fd_out.fd_total_num = fd_total_num;
|
|
aie_cfg->fd_out.fd_pyramid0_num = fd_pyramid_num[0];
|
|
aie_cfg->fd_out.fd_pyramid1_num = fd_pyramid_num[1];
|
|
aie_cfg->fd_out.fd_pyramid2_num = fd_pyramid_num[2];
|
|
|
|
memcpy(aie_cfg->fd_out.rpn31_rlt,
|
|
fd->dma_para->fd_out_hw_va[rpn2_loop_num][0],
|
|
sizeof(aie_cfg->fd_out.rpn31_rlt));
|
|
memcpy(aie_cfg->fd_out.rpn63_rlt,
|
|
fd->dma_para->fd_out_hw_va[rpn1_loop_num][0],
|
|
sizeof(aie_cfg->fd_out.rpn63_rlt));
|
|
memcpy(aie_cfg->fd_out.rpn95_rlt,
|
|
fd->dma_para->fd_out_hw_va[rpn0_loop_num][0],
|
|
sizeof(aie_cfg->fd_out.rpn95_rlt));
|
|
}
|
|
|
|
void aie_get_attr_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
u32 *attr_race_result, *attr_gender_result;
|
|
u32 *attr_age_result, *attr_isIndian_result;
|
|
|
|
aie_cfg->sel_mode = fd->attr_para->sel_mode[fd->attr_para->r_idx];
|
|
aie_cfg->rotate_degree =
|
|
fd->attr_para->rotate_degree[fd->attr_para->r_idx];
|
|
aie_cfg->src_img_addr =
|
|
fd->attr_para->src_img_addr[fd->attr_para->r_idx];
|
|
aie_cfg->src_img_addr_uv =
|
|
fd->attr_para->src_img_addr_uv[fd->attr_para->r_idx];
|
|
aie_cfg->src_img_width = fd->attr_para->img_width[fd->attr_para->r_idx];
|
|
aie_cfg->src_img_height =
|
|
fd->attr_para->img_height[fd->attr_para->r_idx];
|
|
aie_cfg->src_img_fmt = fd->attr_para->src_img_fmt[fd->attr_para->r_idx];
|
|
aie_cfg->fd_version = FD_VERSION;
|
|
aie_cfg->attr_version = ATTR_VERSION;
|
|
|
|
/* 64 feature * 32 bytes */
|
|
attr_age_result =
|
|
(u32 *)fd->dma_para->age_out_hw_va[fd->attr_para->r_idx];
|
|
attr_gender_result =
|
|
(u32 *)fd->dma_para->gender_out_hw_va[fd->attr_para->r_idx];
|
|
attr_isIndian_result =
|
|
(u32 *)fd->dma_para->isIndian_out_hw_va[fd->attr_para->r_idx];
|
|
attr_race_result =
|
|
(u32 *)fd->dma_para->race_out_hw_va[fd->attr_para->r_idx];
|
|
|
|
memcpy(aie_cfg->attr_out.rpn17_rlt, attr_age_result,
|
|
sizeof(aie_cfg->attr_out.rpn17_rlt));
|
|
memcpy(aie_cfg->attr_out.rpn20_rlt, attr_gender_result,
|
|
sizeof(aie_cfg->attr_out.rpn20_rlt));
|
|
memcpy(aie_cfg->attr_out.rpn22_rlt, attr_isIndian_result,
|
|
sizeof(aie_cfg->attr_out.rpn22_rlt));
|
|
memcpy(aie_cfg->attr_out.rpn25_rlt, attr_race_result,
|
|
sizeof(aie_cfg->attr_out.rpn25_rlt));
|
|
|
|
fd->attr_para->r_idx = (fd->attr_para->r_idx + 1) % MAX_ENQUE_FRAME_NUM;
|
|
}
|
|
|
|
|
|
void aie_get_fld_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
|
|
{
|
|
aie_cfg->sel_mode = fd->fld_para->sel_mode;
|
|
aie_cfg->src_img_width = fd->fld_para->img_width;
|
|
aie_cfg->src_img_height = fd->fld_para->img_height;
|
|
aie_cfg->fd_version = FD_VERSION;
|
|
aie_cfg->attr_version = ATTR_VERSION;
|
|
aie_cfg->src_img_addr = fd->fld_para->src_img_addr;
|
|
aie_cfg->fld_face_num = fd->fld_para->face_num;
|
|
|
|
memcpy(aie_cfg->fld_raw_out, fd->dma_para->fld_output_va, FLD_MAX_OUT);
|
|
memcpy((char *)&(aie_cfg->fld_input[0]), (char *)fd->fld_para->fld_input,
|
|
sizeof(struct FLD_CROP_RIP_ROP) * aie_cfg->fld_face_num);
|
|
|
|
}
|