#include <metal_stdlib>
using namespace metal;

// shader 6a84889d491efb52
// Shader dumped from BotW 208, using Cemu 2193a8c
// Based on 4721609a424e9a1f_0000000000000000_vs.txt
// Used for: Restoring the native BotW Anti-Aliasing implementation

constant float resXScale = float($width) / float($gameWidth);
constant float resYScale = float($height) / float($gameHeight);

#define SET_POSITION(_v)                                                       \
    out.position = _v;                                                         \
    out.position.z = (out.position.z + out.position.w) / 2.0
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
struct SupportBuffer {
    int4 remapped[5];
};

struct VertexIn {
    uint4 attrDataSem0 [[attribute(0)]];
    uint4 attrDataSem1 [[attribute(1)]];
};

struct VertexOut {
    float4 position [[position]] [[invariant]];
    float4 passParameterSem4 [[user(locn1)]];
    float4 passParameterSem0 [[user(locn0)]];
    float4 passParameterSem6 [[user(locn2)]];
};

// end of shader inputs/outputs
template <typename TextureT, typename CoordT>
float sampleCompareEmulate(TextureT tex, sampler samplr, CoordT coord,
                           float compareValue) {
    return compareValue < tex.sample(samplr, coord).x ? 1.0 : 0.0;
}
template <typename TextureT, typename CoordT>
float2 textureCalculateLod(TextureT tex, sampler samplr, CoordT coord) {
    float lod = tex.calculate_unclamped_lod(samplr, coord);
    return float2(floor(lod), fract(lod));
}
int clampFI32(int v) {
    if (v == 0x7FFFFFFF)
        return as_type<int>(1.0);
    else if (v == 0xFFFFFFFF)
        return as_type<int>(0.0);
    return as_type<int>(clamp(as_type<float>(v), 0.0, 1.0));
}
float mul_nonIEEE(float a, float b) {
    if (a == 0.0 || b == 0.0)
        return 0.0;
    return a * b;
}
vertex VertexOut main0(uint vid [[vertex_id]], uint iid [[instance_id]],
                       VertexIn in [[stage_in]],
                       constant SupportBuffer &supportBuffer [[buffer(0)]],
                       texture2d<float> tex0 [[texture(0)]],
                       sampler samplr0 [[sampler(0)]]) {
    VertexOut out;
    int4 R0i = int4(0);
    int4 R1i = int4(0);
    int4 R2i = int4(0);
    int4 R3i = int4(0);
    int4 R4i = int4(0);
    int4 R5i = int4(0);
    int4 R6i = int4(0);
    int4 R123i = int4(0);
    int4 R124i = int4(0);
    int4 R125i = int4(0);
    int4 R126i = int4(0);
    int4 R127i = int4(0);
    uint4 attrDecoder;
    int backupReg0i, backupReg1i, backupReg2i, backupReg3i, backupReg4i;
    int PV0ix = 0, PV0iy = 0, PV0iz = 0, PV0iw = 0, PV1ix = 0, PV1iy = 0,
        PV1iz = 0, PV1iw = 0;
    int PS0i = 0, PS1i = 0;
    int4 tempi = int4(0);
    float tempResultf;
    int tempResulti;
    int4 ARi = int4(0);
    bool predResult = true;
    R0i = int4(vid, 0, 0, iid);
    attrDecoder.xyzw = in.attrDataSem0.xyzw;
    attrDecoder = ((attrDecoder >> 8) & 0xFF) | ((attrDecoder << 8) & 0xFF00);
    attrDecoder.xyzw = as_type<uint4>(
        float4(float2(as_type<half2>(attrDecoder.x | (attrDecoder.y << 16))),
               float2(as_type<half2>(attrDecoder.z | (attrDecoder.w << 16)))));
    R1i = int4(int(attrDecoder.x), int(attrDecoder.y), int(attrDecoder.z),
               int(attrDecoder.w));
    attrDecoder.x = in.attrDataSem1.x;
    attrDecoder.x = (attrDecoder.x >> 24) | ((attrDecoder.x >> 8) & 0xFF00) |
                    ((attrDecoder.x << 8) & 0xFF0000) | ((attrDecoder.x << 24));
    attrDecoder.y = 0;
    attrDecoder.z = 0;
    attrDecoder.w = 0;
    attrDecoder.xyzw =
        uint4((attrDecoder.x >> 0) & 0x3FF, (attrDecoder.x >> 10) & 0x3FF,
              (attrDecoder.x >> 20) & 0x3FF, (attrDecoder.x >> 30) & 0x3);
    if ((attrDecoder.x & 0x200) != 0)
        attrDecoder.x |= 0xFFFFFC00;
    if ((attrDecoder.y & 0x200) != 0)
        attrDecoder.y |= 0xFFFFFC00;
    if ((attrDecoder.z & 0x200) != 0)
        attrDecoder.z |= 0xFFFFFC00;
    attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x)) / 511.0, -1.0));
    attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y)) / 511.0, -1.0));
    attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z)) / 511.0, -1.0));
    attrDecoder.w = as_type<uint>(float(attrDecoder.w));
    R2i = int4(int(attrDecoder.x), int(attrDecoder.y), int(attrDecoder.z),
               int(attrDecoder.w));
    // 0
    PV0ix = (R0i.x == int(1)) ? -1 : 0;
    R127i.y = int(0x3f800000);
    PV0iz = int(0x3f800000);
    PV0iw = int(0x40400000);
    R127i.z = int(0xbf800000);
    // 1
    PV1ix = as_type<int>(as_type<float>(supportBuffer.remapped[0].z) *
                         as_type<float>(0x3b808081));
    R123i.y = ((PV0ix == 0) ? (PV0iw) : (int(0xbf800000)));
    R0i.z = 0;
    R123i.w = ((PV0ix == 0) ? (PV0iz) : (int(0xc0400000)));
    R1i.w = int(0x3f800000);
    // 2
    R1i.x = ((R0i.x == 0) ? (R127i.z) : (R123i.y));
    R1i.y = ((R0i.x == 0) ? (R127i.y) : (R123i.w));
    R1i.z = as_type<int>(as_type<float>(PV1ix) + -(0.5));
    R1i.z = as_type<int>(as_type<float>(R1i.z) * 2.0);
    R0i.w = int(0x3d2aaaab);
    R0i.x = int(0x3e000000);
    // 3
    R5i.x = as_type<int>(as_type<float>(R1i.x) * 0.5 + 0.5);
    R0i.y = 0;
    PV1iz = as_type<int>(-(as_type<float>(R1i.y)));
    R4i.w = supportBuffer.remapped[1].z;
    R4i.w = as_type<int>(as_type<float>(R4i.w) / 2.0);
    R4i.x = as_type<int>(-(as_type<float>(supportBuffer.remapped[2].z)));
    R4i.x = as_type<int>(as_type<float>(R4i.x) / 2.0);
    // 4
    R3i.x = as_type<int>(-(as_type<float>(R1i.y)));
    R3i.x = as_type<int>(as_type<float>(R3i.x) / 2.0);
    R5i.y = as_type<int>(as_type<float>(PV1iz) * 0.5 + 0.5);
    R6i.z = R5i.x;
    R3i.w = R1i.x;
    R3i.w = as_type<int>(as_type<float>(R3i.w) / 2.0);
    R3i.y = as_type<int>(as_type<float>(R4i.w) + -(0.5));
    R2i.w = as_type<int>(tex0.sample(samplr0, float2(as_type<float>(R0i.w),
                                                     as_type<float>(R0i.z)))
                             .x);
    R2i.xyz = as_type<int3>(tex0.sample(samplr0, float2(as_type<float>(R0i.x),
                                                        as_type<float>(R0i.y)))
                                .xyz);
    // export
    SET_POSITION(float4(as_type<float>(R1i.x), as_type<float>(R1i.y),
                        as_type<float>(R1i.z), as_type<float>(R1i.w)));
    // export
    // skipped export to semanticId 255
    // 0
    R127i.x = as_type<int>(as_type<float>(R4i.x) + -(0.5));
    R2i.y = as_type<int>(-(as_type<float>(supportBuffer.remapped[3].x)));
    R2i.y = as_type<int>(as_type<float>(R2i.y) * 2.0);
    R127i.z = supportBuffer.remapped[3].y;
    R127i.z = as_type<int>(as_type<float>(R127i.z) * 2.0);
    R6i.w = R5i.y;
    PS0i = as_type<int>(as_type<float>(R3i.w) + as_type<float>(R4i.w));
    // 1
    PV1ix = as_type<int>(as_type<float>(R3i.x) + as_type<float>(R4i.x));
    R126i.y = as_type<int>(as_type<float>(R5i.x) + as_type<float>(R3i.y));
    PV1iz =
        as_type<int>(as_type<float>(R6i.w) +
                     as_type<float>(supportBuffer.remapped[4].w) / resYScale);
    R127i.w =
        as_type<int>(as_type<float>(R6i.z) +
                     as_type<float>(supportBuffer.remapped[4].z) / resXScale);
    R127i.y =
        as_type<int>(mul_nonIEEE(as_type<float>(R2i.y), as_type<float>(PS0i)));
    // 2
    PV0ix = as_type<int>(as_type<float>(PV1iz) + as_type<float>(R127i.x));
    R125i.y = as_type<int>(
        mul_nonIEEE(as_type<float>(R127i.z), as_type<float>(PV1ix)));
    R126i.z =
        as_type<int>(as_type<float>(supportBuffer.remapped[4].y) / resYScale);
    R126i.z = as_type<int>(as_type<float>(R126i.z) / 2.0);
    R126i.w =
        as_type<int>(as_type<float>(supportBuffer.remapped[4].x) / resXScale);
    R126i.w = as_type<int>(as_type<float>(R126i.w) / 2.0);
    R124i.y =
        as_type<int>(as_type<float>(supportBuffer.remapped[4].z) / resXScale);
    R124i.y = as_type<int>(as_type<float>(R124i.y) * 2.0);
    // 3
    backupReg0i = R127i.x;
    backupReg1i = R127i.y;
    R127i.x =
        as_type<int>(as_type<float>(supportBuffer.remapped[4].w) / resYScale);
    R127i.x = as_type<int>(as_type<float>(R127i.x) * 2.0);
    R127i.y = as_type<int>(
        mul_nonIEEE(as_type<float>(R127i.z), as_type<float>(PV0ix)));
    PV1iz = as_type<int>(as_type<float>(R5i.y) + as_type<float>(backupReg0i));
    R127i.w = as_type<int>(as_type<float>(R127i.w) + as_type<float>(R3i.y));
    R5i.z = backupReg1i;
    // 4
    R3i.x = as_type<int>(
        mul_nonIEEE(as_type<float>(R5i.x), as_type<float>(R126i.w)) + 0.5);
    R4i.y = as_type<int>(
        mul_nonIEEE(as_type<float>(R127i.z), as_type<float>(PV1iz)));
    PV0iw = as_type<int>(
        mul_nonIEEE(as_type<float>(R2i.y), as_type<float>(R126i.y)));
    R5i.w = R125i.y;
    // 5
    R4i.x = as_type<int>(
        mul_nonIEEE(as_type<float>(R2i.y), as_type<float>(R127i.w)));
    R3i.y = as_type<int>(
        mul_nonIEEE(as_type<float>(R5i.y), as_type<float>(R126i.z)) + 0.5);
    R3i.z = R124i.y;
    R3i.w = R127i.x;
    R4i.z = PV0iw;
    // 6
    R4i.w = R127i.y;
    // export
    out.passParameterSem4 =
        float4(as_type<float>(R3i.x), as_type<float>(R3i.y),
               as_type<float>(R3i.z), as_type<float>(R3i.w));
    // export
    // skipped export to semanticId 255
    // export
    out.passParameterSem0 =
        float4(as_type<float>(R5i.x), as_type<float>(R5i.y),
               as_type<float>(R5i.z), as_type<float>(R5i.w));
    // export
    out.passParameterSem6 =
        float4(as_type<float>(R6i.x), as_type<float>(R6i.x),
               as_type<float>(R6i.z), as_type<float>(R6i.w));
    return out;
}
