// Based on the original salis-v1 VM architecture:
// https://git.pauloliver.dev/salis-v1/about/

enum {
#define INST(index, label, mnemonic, symbol) label,
    INST_SET
#undef INST
};

#if (defined(COMMAND_BENCH) || defined(COMMAND_NEW)) && defined(ANC_BYTES)
void arch_core_init(struct Core *core) {
    assert(core);

#if defined(MVEC_LOOP)
    uint64_t addr = UINT64_HALF;
#else
    uint64_t addr = 0;
#endif

    for (uint64_t i = 0; i < CLONES; ++i) {
        uint64_t addr_clone = addr + (MVEC_SIZE / CLONES) * i;

        struct Proc *panc = proc_fetch(core, i);

        panc->mb0a = addr_clone;
        panc->mb0s = ANC_SIZE;
        panc->ip = addr_clone;
        panc->sp = addr_clone;
    }
}
#endif

void arch_core_free(struct Core *core) {
    assert(core);
    (void)core;
}

#if defined(COMMAND_LOAD) || defined(COMMAND_NEW)
void arch_core_save(FILE *f, const struct Core *core) {
    assert(f);
    assert(core);

    fwrite(core->iexe, sizeof(uint64_t), INST_COUNT, f);
    fwrite(core->iwrt, sizeof(uint64_t), INST_COUNT, f);
    fwrite(&core->wmb0, sizeof(uint64_t), 1, f);
    fwrite(&core->wmb1, sizeof(uint64_t), 1, f);
    fwrite(&core->wdea, sizeof(uint64_t), 1, f);

#if defined(DATA_PUSH_PATH)
    //fwrite(core->weva, sizeof(uint64_t), MVEC_SIZE, f);
#endif
}
#endif

#if defined(COMMAND_LOAD)
void arch_core_load(FILE *f, struct Core *core) {
    assert(f);
    assert(core);

    fread(core->iexe, sizeof(uint64_t), INST_COUNT, f);
    fread(core->iwrt, sizeof(uint64_t), INST_COUNT, f);
    fread(&core->wmb0, sizeof(uint64_t), 1, f);
    fread(&core->wmb1, sizeof(uint64_t), 1, f);
    fread(&core->wdea, sizeof(uint64_t), 1, f);

#if defined(DATA_PUSH_PATH)
    //fread(core->weva, sizeof(uint64_t), MVEC_SIZE, f);
#endif
}
#endif

uint64_t arch_proc_mb0_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb0a;
}

uint64_t arch_proc_mb0_size(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb0s;
}

uint64_t arch_proc_mb1_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb1a;
}

uint64_t arch_proc_mb1_size(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb1s;
}

uint64_t arch_proc_ip_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->ip;
}

uint64_t arch_proc_sp_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->sp;
}

uint64_t arch_proc_slice(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    (void)core;
    (void)pix;

    return 1;
}

void _free_memory_block(struct Core *core, uint64_t addr, uint64_t size) {
    assert(core);
    assert(size);

    for (uint64_t i = 0; i < size; ++i) {
        mvec_free(core, addr + i);
    }
}

void arch_on_proc_kill(struct Core *core) {
    assert(core);
    assert(core->pnum > 1);

    struct Proc *pfst = proc_fetch(core, core->pfst);

    _free_memory_block(core, pfst->mb0a, pfst->mb0s);

    if (pfst->mb1s) {
        _free_memory_block(core, pfst->mb1a, pfst->mb1s);
    }

    memcpy(pfst, &g_dead_proc, sizeof(struct Proc));
}

uint8_t _get_inst(const struct Core *core, uint64_t addr) {
    assert(core);

    return mvec_get_inst(core, addr) % INST_COUNT;
}

void _increment_ip(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    proc->ip++;
    proc->sp = proc->ip;
}

bool _is_between(uint8_t inst, uint8_t lo, uint8_t hi) {
    assert(inst < INST_COUNT);
    assert(lo < INST_COUNT);
    assert(hi < INST_COUNT);
    assert(lo < hi);

    return (inst >= lo) && (inst <= hi);
}

bool _is_key(uint8_t inst) {
    assert(inst < INST_COUNT);

    return _is_between(inst, keya, keyp);
}

bool _is_lock(uint8_t inst) {
    assert(inst < INST_COUNT);

    return _is_between(inst, loka, lokp);
}

bool _is_rmod(uint8_t inst) {
    assert(inst < INST_COUNT);

    return _is_between(inst, nop0, nop3);
}

bool _key_lock_match(uint8_t key, uint8_t lock) {
    assert(key < INST_COUNT);
    assert(lock < INST_COUNT);
    assert(_is_key(key));

    return (key - keya) == (lock - loka);
}

bool _seek(struct Core *core, uint64_t pix, bool fwrd) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint8_t next = _get_inst(core, proc->ip + 1);

    if (!_is_key(next)) {
        _increment_ip(core, pix);
        return false;
    }

    uint8_t spin = _get_inst(core, proc->sp);

    if (_key_lock_match(next, spin)) {
        return true;
    }

    if (fwrd) {
        proc->sp++;
    } else {
        proc->sp--;
    }

    return false;
}

void _jump(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

#if !defined(NDEBUG)
    uint8_t next = _get_inst(core, proc->ip + 1);
    uint8_t spin = _get_inst(core, proc->sp);
    assert(_is_key(next));
    assert(_is_lock(spin));
    assert(_key_lock_match(next, spin));
#endif

    proc->ip = proc->sp;
}

void _get_reg_addr_list(struct Core *core, uint64_t pix, uint64_t **rlist, int rcount, bool offset) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    assert(rlist);
    assert(rcount);
    assert(rcount < 4);

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t madr = proc->ip + (offset ? 2 : 1);

    for (int i = 0; i < rcount; ++i) {
        rlist[i] = &proc->r0x;
    }

    for (int i = 0; i < rcount; ++i) {
        uint64_t mnxt = madr + i;
        uint8_t mins = _get_inst(core, mnxt);

        if (!_is_rmod(mins)) {
            break;
        }

        switch (mins) {
        case nop0:
            rlist[i] = &proc->r0x;
            break;
        case nop1:
            rlist[i] = &proc->r1x;
            break;
        case nop2:
            rlist[i] = &proc->r2x;
            break;
        case nop3:
            rlist[i] = &proc->r3x;
            break;
        }
    }
}

void _addr(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *reg;

#if !defined(NDEBUG)
    uint8_t next = _get_inst(core, proc->ip + 1);
    uint8_t spin = _get_inst(core, proc->sp);
    assert(_is_key(next));
    assert(_is_lock(spin));
    assert(_key_lock_match(next, spin));
#endif

    _get_reg_addr_list(core, pix, &reg, 1, true);
    *reg = proc->sp;

    _increment_ip(core, pix);
}

void _ifnz(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    uint64_t jmod = _is_rmod(_get_inst(core, proc->ip + 1)) ? 1 : 0;
    uint64_t rmod = *reg ? 1 : 2;

    proc->ip += jmod + rmod;
    proc->sp = proc->ip;
}

void _free_child_memory_of(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    assert(proc->mb1s);

    _free_memory_block(core, proc->mb1a, proc->mb1s);

    proc->mb1a = 0;
    proc->mb1s = 0;
}

void _alloc(struct Core *core, uint64_t pix, bool fwrd) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    uint64_t bsize = *regs[0];

    // Do nothing if block-size is zero
    if (!bsize) {
        _increment_ip(core, pix);
        return;
    }

    // Do nothing if seek pointer is not adjacent to allocated memory block
    if (proc->mb1s) {
        uint64_t exp_addr = proc->mb1a;

        if (fwrd) {
            exp_addr += proc->mb1s;
        } else {
            exp_addr--;
        }

        if (proc->sp != exp_addr) {
            _increment_ip(core, pix);
            return;
        }
    }

    // Allocation was successful, store block address on register
    if (proc->mb1s == bsize) {
        _increment_ip(core, pix);
        *regs[1] = proc->mb1a;
        return;
    }

    // Seek pointer collided with another allocated block, discard and keep looking
    if (mvec_is_alloc(core, proc->sp)) {
        if (proc->mb1s) {
            _free_child_memory_of(core, pix);
        }

        if (fwrd) {
            proc->sp++;
        } else {
            proc->sp--;
        }

        return;
    }

    // Free (non-allocated) byte found, enlarge child block 1 byte
    mvec_alloc(core, proc->sp);

    if (!proc->mb1s || !fwrd) {
        proc->mb1a = proc->sp;
    }

    proc->mb1s++;

    // Advance seek pointer
    if (fwrd) {
        proc->sp++;
    } else {
        proc->sp--;
    }
}

void _bswap(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        uint64_t tmpa = proc->mb0a;
        uint64_t tmps = proc->mb0s;

        proc->mb0a = proc->mb1a;
        proc->mb0s = proc->mb1s;
        proc->mb1a = tmpa;
        proc->mb1s = tmps;
    }

    _increment_ip(core, pix);
}

void _bclear(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        _free_child_memory_of(core, pix);
    }

    _increment_ip(core, pix);
}

void _split(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        struct Proc child = {0};

        child.ip = proc->mb1a;
        child.sp = proc->mb1a;
        child.mb0a = proc->mb1a;
        child.mb0s = proc->mb1s;

        proc->mb1a = 0;
        proc->mb1s = 0;

        // A new organism is born :)
        proc_new(core, &child);
    } else {
        assert(!proc->mb1a);
    }

    _increment_ip(core, pix);
}

void _3rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *regs[3];

    _get_reg_addr_list(core, pix, regs, 3, false);

    // Organisms can do arithmetic using any sequence of 3 registers
    switch (inst) {
    case addn:
        *regs[0] = *regs[1] + *regs[2];
        break;
    case subn:
        *regs[0] = *regs[1] - *regs[2];
        break;
    case muln:
        *regs[0] = *regs[1] * *regs[2];
        break;
    case divn:
        // Division by zero
        // Do nothing
        if (*regs[2]) {
            *regs[0] = *regs[1] / *regs[2];
        }

        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void _1rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    switch (inst) {
    case incn:
        (*reg)++;
        break;
    case decn:
        (*reg)--;
        break;
    case notn:
        *reg = !(*reg);
        break;
    case shfl:
        *reg <<= 1;
        break;
    case shfr:
        *reg >>= 1;
        break;
    case zero:
        *reg = 0;
        break;
    case unit:
        *reg = 1;
        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void _push(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    proc->s7 = proc->s6;
    proc->s6 = proc->s5;
    proc->s5 = proc->s4;
    proc->s4 = proc->s3;
    proc->s3 = proc->s2;
    proc->s2 = proc->s1;
    proc->s1 = proc->s0;
    proc->s0 = *reg;

    _increment_ip(core, pix);
}

void _pop(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    *reg = proc->s0;
    proc->s0 = proc->s1;
    proc->s1 = proc->s2;
    proc->s2 = proc->s3;
    proc->s3 = proc->s4;
    proc->s4 = proc->s5;
    proc->s5 = proc->s6;
    proc->s6 = proc->s7;
    proc->s7 = 0;

    _increment_ip(core, pix);
}

int _sp_dir(uint64_t src, uint64_t dst) {
    if (src == dst) {
        return 0;
    } else if (src - dst <= dst - src) {
        return -1;
    } else {
        return 1;
    }
}

void _load(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    int sp_dir = _sp_dir(proc->sp, *regs[0]);

    if (sp_dir == 1) {
        proc->sp++;
    } else if (sp_dir == -1) {
        proc->sp--;
    } else {
        *regs[1] = mvec_get_inst(core, *regs[0]);
        _increment_ip(core, pix);
    }
}

bool _is_writeable_by(const struct Core *core, uint64_t addr, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    return !mvec_is_alloc(core, addr) || mvec_is_proc_owner(core, addr, pix);
}

void _write(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    int sp_dir = _sp_dir(proc->sp, *regs[0]);

    if (sp_dir == 1) {
        proc->sp++;
    } else if (sp_dir == -1) {
        proc->sp--;
    } else {
        if (_is_writeable_by(core, *regs[0], pix)) {
            // Store write event
            uint8_t inst = *regs[1] % INST_COUNT;

            ++core->iwrt[inst];

            if (mvec_is_in_mb0_of_proc(core, *regs[0], pix)) {
                ++core->wmb0;
            } else if (mvec_is_in_mb1_of_proc(core, *regs[0], pix)) {
                ++core->wmb1;
            } else {
                ++core->wdea;
            }

            // Write instruction
            mvec_set_inst(core, *regs[0], *regs[1] % INST_CAP);
        }

        _increment_ip(core, pix);
    }
}

void _2rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    switch (inst) {
    case dupl:
        *regs[1] = *regs[0];
        break;
    case swap:
        {
            uint64_t tmp = *regs[0];
            *regs[0] = *regs[1];
            *regs[1] = tmp;
        }

        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void arch_proc_step(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint8_t inst = _get_inst(core, proc->ip);

    // Store specific instruction execution event in database
    ++core->iexe[inst];

    // Execute instruction
    switch (inst) {
    case jmpb:
        if (_seek(core, pix, false)) {
            _jump(core, pix);
        }

        break;
    case jmpf:
        if (_seek(core, pix, true)) {
            _jump(core, pix);
        }

        break;
    case adrb:
        if (_seek(core, pix, false)) {
            _addr(core, pix);
        }

        break;
    case adrf:
        if (_seek(core, pix, true)) {
            _addr(core, pix);
        }

        break;
    case ifnz:
        _ifnz(core, pix);
        break;
    case allb:
        _alloc(core, pix, false);
        break;
    case allf:
        _alloc(core, pix, true);
        break;
    case bswp:
        _bswap(core, pix);
        break;
    case bclr:
        _bclear(core, pix);
        break;
    case splt:
        _split(core, pix);
        break;
    case addn:
    case subn:
    case muln:
    case divn:
        _3rop(core, pix, inst);
        break;
    case incn:
    case decn:
    case notn:
    case shfl:
    case shfr:
    case zero:
    case unit:
        _1rop(core, pix, inst);
        break;
    case pshn:
        _push(core, pix);
        break;
    case popn:
        _pop(core, pix);
        break;
    case load:
        _load(core, pix);
        break;
    case wrte:
        _write(core, pix);
        break;
    case dupl:
    case swap:
        _2rop(core, pix, inst);
        break;
    default:
        _increment_ip(core, pix);
        break;
    }

    return;
}

#if !defined(NDEBUG)
void arch_validate_proc(const struct Core *core, uint64_t pix) {
    assert(core);

    const struct Proc *proc = proc_get(core, pix);

    assert(proc->mb0s);

    if (proc->mb1a) {
        assert(proc->mb1s);
    }

    for (uint64_t i = 0; i < proc->mb0s; ++i) {
        uint64_t addr = proc->mb0a + i;
        assert(mvec_is_alloc(core, addr));
        assert(mvec_is_proc_owner(core, addr, pix));
    }

    for (uint64_t i = 0; i < proc->mb1s; ++i) {
        uint64_t addr = proc->mb1a + i;
        assert(mvec_is_alloc(core, addr));
        assert(mvec_is_proc_owner(core, addr, pix));
    }
}
#endif

wchar_t arch_symbol(uint8_t inst) {
    switch (inst % INST_COUNT) {
#define INST(index, label, mnemonic, symbol) case index: return symbol;
    INST_SET
#undef INST
    }

    assert(false);
    return L'\0';
}

const char *arch_mnemonic(uint8_t inst) {
    switch (inst % INST_COUNT) {
#define INST(index, label, mnemonic, symbol) case index: return mnemonic;
    INST_SET
#undef INST
    }

    assert(false);
    return NULL;
}

// ----------------------------------------------------------------------------
// Data aggregation functions
// ----------------------------------------------------------------------------
#if defined(DATA_PUSH_PATH)
#if defined(COMMAND_NEW)
void arch_push_data_header() {
    assert(g_sim_data);

    g_info("Creating 'arch_general' table in SQLite database");
    salis_exec_sql(
        0, NULL, NULL,
        "create table arch_general ("
#define FOR_CORE(i) \
        "cycl_" #i " int not null, " \
        "wmb0_" #i " int not null, " \
        "wmb1_" #i " int not null, " \
        "wdea_" #i " int not null, "
        FOR_CORES
#undef FOR_CORE
        "step int not null"
        ");"
    );

    // Instruction events
    char *iprefs[] = { "pop", "exe", "wrt" };
    int iprefs_cnt = sizeof(iprefs) / sizeof(iprefs[0]);

    for (int i = 0; i < CORES; ++i) {
        for (int j = 0; j < iprefs_cnt; ++j) {
            g_info("Creating '%s_%d' table in SQLite database", iprefs[j], i);
            salis_exec_sql(
                0, NULL, NULL,
                "create table %s_%d ("
#define FOR_CORE(i) "cycl_" #i " int not null, "
                FOR_CORES
#undef FOR_CORE
#define INST(index, label, mnemonic, symbol) #label " int not null, "
                INST_SET
#undef INST
                "step int not null"
                ");",
                iprefs[j], i
            );
        }
    }

    // Memory events
    char *eprefs[] = { /* "wev" */ };
    int eprefs_cnt = sizeof(eprefs) / sizeof(eprefs[0]);

    for (int i = 0; i < CORES; ++i) {
        for (int j = 0; j < eprefs_cnt; ++j) {
            g_info("Creating '%s_%d' table in SQLite database", eprefs[j], i);
            salis_exec_sql(
                0, NULL, NULL,
                "create table %s_%d ("
#define FOR_CORE(i) "cycl_" #i " int not null, "
                FOR_CORES
#undef FOR_CORE
                "size int not null, "
                "evts blob not null ,"
                "step int not null"
                ");",
                eprefs[j], i
            );
        }
    }
}
#endif

void arch_push_data_line() {
    assert(g_sim_data);

    // Measure instruction population
    uint64_t ipop[CORES][INST_COUNT] = { 0 };

    for (int i = 0; i < CORES; ++i) {
        struct Core *core = &g_cores[i];

        for (uint64_t j = 0; j < MVEC_SIZE; ++j) {
            ++ipop[i][_get_inst(core, j)];
        }

#if !defined(NDEBUG)
        uint64_t pop_tot = 0;

        for (int j = 0; j < INST_COUNT; ++j) {
            pop_tot += ipop[i][j];
        }

        assert(pop_tot == MVEC_SIZE);
#endif
    }

    g_info("Pushing row to 'arch_general' table in SQLite database");
    salis_exec_sql(
        0, NULL, NULL,
        "insert into arch_general ("
#define FOR_CORE(i) \
        "cycl_" #i ", " \
        "wmb0_" #i ", " \
        "wmb1_" #i ", " \
        "wdea_" #i ", "
        FOR_CORES
#undef FOR_CORE
        "step"
        ") values ("
#define FOR_CORE(i) "%ld,  %ld, %ld, %ld, "
        FOR_CORES
#undef FOR_CORE
        "%ld"
        ");",
#define FOR_CORE(i) \
        g_cores[i].cycl, \
        g_cores[i].wmb0, \
        g_cores[i].wmb1, \
        g_cores[i].wdea,
        FOR_CORES
#undef FOR_CORE
        g_steps
    );

    char *iprefs[] = { "pop", "exe", "wrt" };
    int iprefs_cnt = sizeof(iprefs) / sizeof(iprefs[0]);

    for (int i = 0; i < CORES; ++i) {
        for (int j = 0; j < iprefs_cnt; ++j) {
            uint64_t *ia = NULL;

            if (!strcmp("pop", iprefs[j])) {
                ia = ipop[i];
            } else if (!strcmp("exe", iprefs[j])) {
                ia = g_cores[i].iexe;
            } else if (!strcmp("wrt", iprefs[j])) {
                ia = g_cores[i].iwrt;
            }

            g_info("Pushing row to '%s_%d' table in SQLite database", iprefs[j], i);
            salis_exec_sql(
                0, NULL, NULL,
                "insert into %s_%d ("
#define FOR_CORE(i) "cycl_" #i ", "
                FOR_CORES
#undef FOR_CORE
#define INST(index, label, mnemonic, symbol) #label ", "
                INST_SET
#undef INST
                "step"
                ") values ("
#define FOR_CORE(i) "%ld, "
                FOR_CORES
#undef FOR_CORE
#define INST(index, label, mnemonic, symbol) "%ld, "
                INST_SET
#undef INST
                "%ld"
                ");",
                iprefs[j],
                i,
#define FOR_CORE(i) g_cores[i].cycl,
                FOR_CORES
#undef FOR_CORE
#define INST(index, label, mnemonic, symbol) ia[index],
                INST_SET
#undef INST
                g_steps
            );
        }
    }

    // TODO: insert write memory events
    char *eprefs[] = { /* "wev" */ };
    int eprefs_cnt = sizeof(eprefs) / sizeof(eprefs[0]);

    for (int i = 0; i < CORES; ++i) {
        for (int j = 0; j < eprefs_cnt; ++j) {
            uint64_t *in = NULL;

            //if (!strcmp("wev", eprefs[j])) {
            //    in = g_cores[i].weva;
            //}

            // Compress event data
            size_t size = sizeof(uint64_t) * MVEC_SIZE;
            char *out = malloc(size);
            assert(out);

            z_stream strm = { 0 };
            strm.zalloc = NULL;
            strm.zfree = NULL;
            strm.opaque = NULL;

            deflateInit(&strm, Z_DEFAULT_COMPRESSION);

            strm.avail_in = size;
            strm.avail_out = size;
            strm.next_in = (Bytef *)in;
            strm.next_out = (Bytef *)out;

            deflate(&strm, Z_FINISH);

            // Insert blob
            const void *blob = out;
            int blob_size = strm.total_out;

            g_info("Pushing row to '%s_%d' table in SQLite database", eprefs[j], i);
            salis_exec_sql(
                1, &blob, &blob_size,
                "insert into %s_%d ("
#define FOR_CORE(i) "cycl_" #i ", "
                FOR_CORES
#undef FOR_CORE
                "size, evts, step"
                ") values ("
#define FOR_CORE(i) "%ld, "
                FOR_CORES
#undef FOR_CORE
                "%ld, ?, %ld"
                ");",
                eprefs[j], i,
#define FOR_CORE(i) g_cores[i].cycl,
                FOR_CORES
#undef FOR_CORE
                blob_size, g_steps
            );

            deflateEnd(&strm);
            free(out);
        }
    }

    // Reset arch-specific data aggregation fields
    for (int i = 0; i < CORES; ++i) {
        struct Core *core = &g_cores[i];

        memset(core->iexe, 0, sizeof(uint64_t) * INST_COUNT);
        memset(core->iwrt, 0, sizeof(uint64_t) * INST_COUNT);

        core->wmb0 = 0;
        core->wmb1 = 0;
        core->wdea = 0;

        //memset(core->weva, 0, sizeof(uint64_t) * MVEC_SIZE);
    }
}
#endif