diff --git a/software/apps/CMakeLists.txt b/software/apps/CMakeLists.txt index 0c95dfb..5662d26 100644 --- a/software/apps/CMakeLists.txt +++ b/software/apps/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(bad_apple) +add_subdirectory(colortext) add_subdirectory(colour_terminal) add_subdirectory(christmas_snowflakes) add_subdirectory(dht_logging) diff --git a/software/apps/colortext/CMakeLists.txt b/software/apps/colortext/CMakeLists.txt new file mode 100644 index 0000000..8c83603 --- /dev/null +++ b/software/apps/colortext/CMakeLists.txt @@ -0,0 +1,30 @@ +add_executable(colortext + main.c + font.h + gen.h + gen.c + scanout.h + scanout.S + text.h + text.c +) + +# TODO this should work ok with DVI_N_TMDS_BUFFERS=2 (perhaps need to +# rearrange some pushes/pops) and also as we are monochrome the buffers are 3x +# as big as they need to be +target_compile_definitions(colortext PRIVATE + DVI_DEFAULT_SERIAL_CONFIG=${DVI_DEFAULT_SERIAL_CONFIG} + DVI_VERTICAL_REPEAT=1 + DVI_N_TMDS_BUFFERS=3 + DVI_MONOCHROME_TMDS=0 + ) + +target_link_libraries(colortext + pico_stdlib + pico_multicore + pico_util + libdvi +) + +# create map/bin/hex file etc. +pico_add_extra_outputs(colortext) diff --git a/software/apps/colortext/font.h b/software/apps/colortext/font.h new file mode 100644 index 0000000..b880efc --- /dev/null +++ b/software/apps/colortext/font.h @@ -0,0 +1,76 @@ +#define FONT_STRIDE 100 +#define FONT_HEIGHT 16 +static const uint16_t font_x_offsets[] = { + 0, 4, 9, 16, 24, 32, 46, 59, 64, 69, 74, 82, 91, 95, 104, 108, 113, + 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, 197, 201, 210, 219, + 228, 235, 250, 262, 273, 284, 296, 306, 315, 327, 339, 344, 350, 362, + 372, 387, 399, 411, 420, 432, 443, 452, 462, 474, 486, 502, 514, 526, + 536, 541, 546, 551, 559, 567, 572, 579, 587, 594, 602, 609, 615, 623, + 631, 636, 640, 648, 653, 666, 674, 682, 690, 698, 704, 710, 715, 723, + 731, 743, 751, 759, 766, 774, 777, 785 +}; +static const uint8_t font_widths[] = { + 4, 5, 7, 8, 8, 14, 13, 5, 5, 5, 8, 9, 4, 9, 4, 5, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 4, 4, 9, 9, 9, 7, 15, 12, 11, 11, 12, 10, 9, 12, 12, 5, 6, 12, + 10, 15, 12, 12, 9, 12, 11, 9, 10, 12, 12, 16, 12, 12, 10, 5, 5, 5, 8, + 8, 5, 7, 8, 7, 8, 7, 6, 8, 8, 5, 4, 8, 5, 13, 8, 8, 8, 8, 6, 6, 5, 8, + 8, 12, 8, 8, 7, 8, 3, 8, 9 +}; +static const uint32_t font_bits[] = { + 0x8000000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3e482440, + 0x200c0138, 0x2008, 0x10390000, 0xf8403838, 0x3818fee0, 0x0, + 0x801f01c0, 0xf5f01f80, 0x83fcff03, 0x9c39c397, 0xc0701de7, 0xf8381c39, + 0xe03f0701, 0x1ce1dff2, 0xc1dc39dc, 0xe7fdc1d, 0x200041c, 0x800010, + 0x42010030, 0x404, 0x0, 0x0, 0x0, 0x64c, 0x49482440, 0x201201e4, + 0xa824, 0x18450000, 0x8604444, 0x44248218, 0x0, 0x8060c220, 0x260c3101, + 0x6208820c, 0x8108118, 0xc0600841, 0x10c60830, 0x104218c2, 0x8409113, + 0x80881088, 0x22204808, 0x1000a10, 0xc00018, 0x42018048, 0x606, 0x0, + 0x80, 0x0, 0x842, 0x49482440, 0x101201a2, 0x7042, 0x14450000, + 0x8504282, 0x82424204, 0x30018000, 0x40802220, 0x24042101, 0x10080210, + 0x8108110, 0xa0a00821, 0x10820850, 0x8821044, 0x10409112, 0x41042104, + 0x22104410, 0x1000a10, 0x800010, 0x10008, 0x404, 0x0, 0x80, 0x0, 0x842, + 0x9fe0040, 0x1200a2, 0x407042, 0x10828000, 0x4502080, 0x82424004, + 0xc0006044, 0x40ac1200, 0x24022103, 0x8080220, 0x8108100, 0xa0a00811, + 0x11010850, 0x8822024, 0x10408100, 0x22042104, 0x42080220, 0xc0001110, + 0x70b0e1d1, 0x421d3608, 0xe8668474, 0x6970e870, 0xbf3b1bde, 0x1ff7bbf3, + 0x842, 0xa240040, 0x1cc00d2, 0x40a881, 0x10828000, 0x3c481080, + 0x82242032, 0x1044, 0x21331101, 0x20021102, 0x9084220, 0x8108100, + 0x91200809, 0x11010890, 0x30422022, 0x20408100, 0x14046202, 0x42080220, + 0x20001110, 0x88c91132, 0x6333193c, 0x9c99c424, 0x5d999c89, 0x12121091, + 0x10a11121, 0x842, 0x1c240040, 0x870e4c, 0x402081, 0x10824000, + 0x40443840, 0xc418204e, 0x1fc0c00, 0x21110886, 0x20021f06, 0x9f87e20, + 0x810ff3c, 0x91200807, 0xf1010890, 0xc03e2021, 0x20408101, 0x8025202, + 0x82040140, 0x20002090, 0x84850a12, 0x42211088, 0x8888414, 0x9090905, + 0x12121081, 0x8210a21, 0x11c0842, 0x287f0040, 0x488960, 0x83f80081, + 0x1082403f, 0x80444020, 0xb8241082, 0x200, 0x11108888, 0x20023104, + 0x9084220, 0x8108110, 0x91200809, 0x11010910, 0x122020, 0x20408102, + 0x14025202, 0x82020080, 0x80002090, 0xfc840a13, 0x42211088, 0x888840c, + 0x9090905, 0x2122108e, 0x4120412, 0x1321041, 0x48120000, 0x3048a0, + 0x400081, 0x10824000, 0x80428010, 0x40421082, 0xc00, 0xf1108806, + 0x20024107, 0x8080220, 0x8108110, 0x8a200811, 0x11010a10, 0x222020, + 0x40408104, 0x22029401, 0x2010080, 0x60000011, 0x4840a12, 0x42210908, + 0x8888414, 0x9090905, 0x21221090, 0x2120a12, 0xe20842, 0x49120000, + 0x21048b0, 0x400081, 0x10442000, 0x80fe8008, 0x40421082, 0x1fc1000, + 0x8988801, 0x2404410c, 0x10080210, 0x9108110, 0x8a200821, 0x10820a10, + 0x8421040, 0x40408104, 0x41029401, 0x2010080, 0x20000011, 0x4840a12, + 0x42210708, 0x8888424, 0x9090905, 0x41421091, 0x1140a15, 0x842, + 0x49120040, 0x1288490, 0x20400081, 0x10442200, 0x62404684, 0x30240844, + 0xc0006044, 0x8948880, 0x230c6108, 0x6009020c, 0x9108118, 0x84220841, + 0x10c60c10, 0x188218c0, 0x80210102, 0x80810800, 0x2408080, 0x20000012, + 0x89c91133, 0x42210088, 0x8888444, 0x9991889, 0x80c71293, 0x10881108, + 0x842, 0x3e120040, 0xc70310, 0x20000042, 0x38382200, 0x1c403c7e, + 0xe180838, 0x30018044, 0x1c631080, 0xf0f03fbc, 0x801cff03, 0x8639c387, + 0xc471fde7, 0x38380879, 0xeb870780, 0x801e0381, 0xc1c10800, 0x23fc1c1, + 0xc0000012, 0x70b0e0e6, 0x47739f3c, 0x9dddceee, 0x1d70e873, 0x8082e10d, + 0x1f883b88, 0x842, 0x8000000, 0x0, 0x10000042, 0x1000, 0x0, 0x0, 0x20, + 0x2000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x600, 0x0, 0x0, 0x2000000, 0x10, 0x0, + 0x40002080, 0x0, 0x1000800, 0x0, 0x40000, 0x802, 0x0, 0x0, 0x24, + 0x1000, 0x0, 0x0, 0x0, 0x60c000, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc00, 0x0, + 0x0, 0x2000000, 0x7f8010, 0x0, 0x40002080, 0x0, 0x1000800, 0x0, + 0x40000, 0x802, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0, 0x1f0000, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x7000, 0x0, 0x0, 0xe000000, 0x1c, 0x0, 0x50001180, 0x0, + 0x1000800, 0x0, 0x28000, 0x60c, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x30000f00, 0x0, + 0x3801c00, 0x0, 0x18000, 0x0 +}; diff --git a/software/apps/colortext/gen.c b/software/apps/colortext/gen.c new file mode 100644 index 0000000..83669f1 --- /dev/null +++ b/software/apps/colortext/gen.c @@ -0,0 +1,163 @@ +#include +#include "dvi.h" +#include "gen.h" + +uint32_t expand_1_to_4_lut[256]; + +void init_expand_lut() { + for (uint i = 0; i < 256; i++) { + uint32_t out = 0; + for (uint j = 0; j < 8; j++) { + if (i & (1 << j)) { + out |= 15 << (j * 4); + } + } + expand_1_to_4_lut[i] = out; + } +} + +// n is number of bytes of input (number of pixels / 8) +void __not_in_flash("main") expand_1_to_4(uint8_t *src, uint32_t *dst, uint n) { + for (unsigned int i = 0; i < n; i++) { + *dst++ = expand_1_to_4_lut[*src++]; + } +} + +void __not_in_flash("main") apply_colors(uint32_t *src, uint32_t *dst, uint32_t fg, uint32_t bg, uint n) { + fg *= 0x11111111; + bg *= 0x11111111; + uint32_t mask = fg ^ bg; + for (uint i = 0; i < n; i++) { + *dst++ = (*src++ & mask) ^ bg; + } +} + +void __not_in_flash("main") process_scanline(const Run *runs, uint32_t y, uint32_t *outp) { + uint32_t outw = 0; + uint32_t outbitix = 0; + for (;;) { + const uint32_t *inp = runs->input; + if (inp == 0) { + break; + } + inp = (const uint32_t *)((const uint8_t *)inp + y * runs->stride); + int count = runs->count; + int inbitix = runs->bit_offset; + runs++; + uint32_t inw = *inp++; + outw |= (inw >> inbitix) << outbitix; + if (inbitix > outbitix) { + count -= 32 - inbitix; + outbitix += 32 - inbitix; + if (count <= 0) { + outbitix += count; + outw = (outw << (32 - outbitix)) >> (32 - outbitix); + continue; + } + } else { + count -= 32 - outbitix; + if (count >= 0) { + *outp++ = outw; + if (count == 0) { + outw = 0; + outbitix = 0; + continue; + } + outbitix -= inbitix; + if (outbitix != 0) { + outw = inw >> (32 - outbitix); + count -= outbitix; + if (count <= 0) { + outbitix += count; + outw = (outw << (32 - outbitix)) >> (32 - outbitix); + continue; + } + } else { + outw = 0; + } + } else { + outbitix = 32 + count; + outw = (outw << (-count)) >> (-count); + continue; + } + } + if (outbitix == 0) { + if ((count & 31) == 0) { + for (;;) { + *outp++ = *inp++; + count -= 32; + if (count == 0) { + break; + } + } + outw = 0; + } else { + for (;;) { + outw = *inp++; + if (count < 32) { + outbitix = count; + outw = (outw << (32 - outbitix)) >> (32 - outbitix); + break; + } + count -= 32; + *outp++ = outw; + } + } + } else { + uint32_t frac = (count & 31) + outbitix; + if (frac < 32) { + for (;;) { + inw = *inp++; + outw |= inw << outbitix; + if (count < 32) { + outbitix += count; + outw = (outw << (32 - outbitix)) >> (32 - outbitix); + break; + } + count -= 32; + *outp++ = outw; + outw = inw >> (32 - outbitix); + } + } else if (frac == 32) { + for (;;) { + inw = *inp++; + outw |= inw << outbitix; + *outp++ = outw; + count -= 32; + if (count < 0) { + break; + } + outw = inw >> (32 - outbitix); + } + outw = 0; + outbitix = 0; + } else { + for (;;) { + inw = *inp++; + outw |= inw << outbitix; + *outp++ = outw; + outw = inw >> (32 - outbitix); + count -= 32; + if (count <= 0) { + outbitix += count; + outw = (outw << (32 - outbitix)) >> (32 - outbitix); + break; + } + } + } + } + } +} + +extern const uint32_t tmds_table_4bpp[]; + +void __not_in_flash("main") make_1bpp_pal(uint32_t pal[16], uint bg, uint fg) { + for (uint chan = 0; chan < 3; chan++) { + uint bg_g = (bg >> (chan * 4)) & 0xf; + uint fg_g = (fg >> (chan * 4)) & 0xf; + pal[chan] = tmds_table_4bpp[bg_g * 0x11]; + pal[chan + 4] = tmds_table_4bpp[bg_g * 0x10 + fg_g]; + pal[chan + 8] = tmds_table_4bpp[fg_g * 0x10 + bg_g]; + pal[chan + 12] = tmds_table_4bpp[fg_g * 0x11]; + } +} diff --git a/software/apps/colortext/gen.h b/software/apps/colortext/gen.h new file mode 100644 index 0000000..6cd9b64 --- /dev/null +++ b/software/apps/colortext/gen.h @@ -0,0 +1,16 @@ +void init_expand_lut(); +void expand_1_to_4(uint8_t *src, uint32_t *dst, uint n); +void apply_colors(uint32_t *src, uint32_t *dst, uint32_t fg, uint32_t bg, uint n); + +void make_1bpp_pal(uint32_t pal[16], uint bg, uint fg); + +typedef struct Run { + // NULL for sentinel + const uint32_t *input; + // stride in bytes (must be multiple of 4) + uint32_t stride; + uint16_t bit_offset; + uint16_t count; +} Run; + +void process_scanline(const Run *runs, uint32_t y, uint32_t *outp); diff --git a/software/apps/colortext/main.c b/software/apps/colortext/main.c new file mode 100644 index 0000000..870a990 --- /dev/null +++ b/software/apps/colortext/main.c @@ -0,0 +1,432 @@ +#include +#include +#include +#include "pico/stdlib.h" +#include "pico/multicore.h" +#include "hardware/clocks.h" +#include "hardware/irq.h" +#include "hardware/sync.h" +#include "hardware/gpio.h" +#include "hardware/vreg.h" +#include "hardware/structs/bus_ctrl.h" +#include "hardware/structs/ssi.h" +#include "hardware/dma.h" +#include "pico/sem.h" + +#include "dvi.h" +#include "dvi_serialiser.h" +#include "common_dvi_pin_configs.h" +#include "tmds_encode.h" + +#include "gen.h" +#include "scanout.h" +#include "text.h" + +#include "font_8x8.h" +#define FONT_CHAR_WIDTH 8 +#define FONT_CHAR_HEIGHT 8 +#define FONT_N_CHARS 95 +#define FONT_FIRST_ASCII 32 + +#include "font.h" + + +// Pick one: +#define MODE_640x480_60Hz +// #define MODE_800x600_60Hz +// #define MODE_960x540p_60Hz +// #define MODE_1280x720_30Hz + +#if defined(MODE_640x480_60Hz) +// DVDD 1.2V (1.1V seems ok too) +#define FRAME_WIDTH 640 +#define FRAME_HEIGHT 480 +#define VREG_VSEL VREG_VOLTAGE_1_20 +#define DVI_TIMING dvi_timing_640x480p_60hz + +#elif defined(MODE_800x600_60Hz) +// DVDD 1.3V, going downhill with a tailwind +#define FRAME_WIDTH 800 +#define FRAME_HEIGHT 600 +#define VREG_VSEL VREG_VOLTAGE_1_30 +#define DVI_TIMING dvi_timing_800x600p_60hz + + +#elif defined(MODE_960x540p_60Hz) +// DVDD 1.25V (slower silicon may need the full 1.3, or just not work) +#define FRAME_WIDTH 960 +#define FRAME_HEIGHT 540 +#define VREG_VSEL VREG_VOLTAGE_1_25 +#define DVI_TIMING dvi_timing_960x540p_60hz + +#elif defined(MODE_1280x720_30Hz) +// 1280x720p 30 Hz (nonstandard) +// DVDD 1.25V (slower silicon may need the full 1.3, or just not work) +#define FRAME_WIDTH 1280 +#define FRAME_HEIGHT 720 +#define VREG_VSEL VREG_VOLTAGE_1_25 +#define DVI_TIMING dvi_timing_1280x720p_30hz + +#else +#error "Select a video mode!" +#endif + +#define LED_PIN PICO_DEFAULT_LED_PIN + +struct dvi_inst dvi0; +struct semaphore dvi_start_sem; + +#define CHAR_COLS (FRAME_WIDTH / FONT_CHAR_WIDTH) +#define CHAR_ROWS (FRAME_HEIGHT / FONT_CHAR_HEIGHT) +char charbuf[CHAR_ROWS * CHAR_COLS]; + +#if DVI_MONOCHROME_TMDS +#define N_CHAN 1 +#else +#define N_CHAN 3 +#endif +#define N_PIX_BUFFERS 5 + +static inline void font_scanline(uint8_t *scanbuf, const char *chars, uint y) { + memcpy(scanbuf, font_bits + (y % FONT_HEIGHT) * (FONT_STRIDE / 4), FRAME_WIDTH / 8); +} + +// Info needed to create one scanline of TMDS +struct scan_display_list { + uint32_t bwbuf[FRAME_WIDTH / 8]; + uint32_t scanlist[16]; +}; + +struct scan_display_list display_lists[N_PIX_BUFFERS]; + +uint32_t global_pal[16]; +uint32_t global_pal2[16]; + +struct render_slot { + uint cpu_assigned; + uint complete; + uint y; + struct scan_display_list *dlist; +}; + +struct render_state { + struct render_slot slots[N_PIX_BUFFERS]; + uint slot_ix; + uint y; + queue_t q_core1_req; +}; + +struct render_state global_render_state; + +struct scanlist sl; + +// Encode one scanline from display list to TMDS +// Runs on core 1 irq, could be in scratch +void __not_in_flash("main") encode_scanline(uint32_t *tmdsbuf, struct scan_display_list *dl, uint y) { + tmds_scan(dl->scanlist, dl->bwbuf, tmdsbuf, FRAME_WIDTH * 2); +} + +// https://iamkate.com/data/12-bit-rainbow/ +static uint16_t palette[12] = { + 0x817, + 0xa35, + 0xc66, + 0xe94, + 0xed0, + 0x9d5, + 0x4d8, + 0x2cb, + 0x0bc, + 0x09c, + 0x36b, + 0x639, +}; + +#define LEADING 2 + +struct text_line textlines[64]; +uint n_textlines = 0; + +// Render one scanline to a display list. +// Runs on either core, on irq on core 0 or in main loop on core 1 +struct scan_display_list * __not_in_flash("main") render_scanline(struct render_slot *slot, uint slot_ix) { + uint y = slot->y; + struct scan_display_list *dlist = &display_lists[slot_ix]; + uint this_y = y % (FONT_HEIGHT + LEADING); + uint line_num = y / (FONT_HEIGHT + LEADING); + uint32_t *scan = dlist->scanlist; + if (this_y < FONT_HEIGHT && line_num < n_textlines) { + uint width = textlines[line_num].width; + render_text_scanline(dlist->bwbuf, textlines[line_num].runs, this_y); + uint j = 0; + if (width > 0) { + scan[j++] = (uint32_t)tmds_scan_1bpp_pal; + scan[j++] = width / 2; + scan[j++] = (uint32_t)textlines[line_num].palette; + } + if (width < FRAME_WIDTH) { + scan[j++] = (uint32_t)tmds_scan_solid; + scan[j++] = (FRAME_WIDTH - width) / 2; + scan[j++] = 0; + } + scan[j] = (uint32_t)tmds_scan_stop; + } else { + scan[0] = (uint32_t)tmds_scan_solid_tmds; + scan[1] = FRAME_WIDTH / 2; + scan[2] = 0x7fd00u; + scan[3] = 0x7fd00u; + scan[4] = 0x7fd00u; + scan[5] = (uint32_t)tmds_scan_stop; + } + return dlist; +} + +static uint32_t pixline[N_CHAN * FRAME_WIDTH / 8]; + +void prepare_pixline(void) { + for (uint chan = 0; chan < N_CHAN; chan++) { + uint32_t *out = pixline + chan * (FRAME_WIDTH / 8); + for (uint i = 0; i < FRAME_WIDTH / 8; i++) { + uint32_t g = (i & 15) * 0x11111111; + if (i >= 4 && i < 16) { + g = ((palette[i - 4] >> (4 * chan)) & 15) * 0x11111111; + } + out[i] = g; + } + out[0] = 0x76543210; + out[1] = 0xfedcba98; + out[2] = 0x89abcdef; + out[3] = 0x01234567; + } +} + +static uint late_scanline_count = 0; + +static inline void tmds_encode_scanline(void) { + uint32_t *pixbuf; + uint32_t *tmdsbuf; + while (late_scanline_count > 0 && queue_try_remove(&dvi0.q_colour_valid, &pixbuf)) { + queue_add_blocking(&dvi0.q_colour_free, &pixbuf); + late_scanline_count--; + } + if (!queue_try_remove(&dvi0.q_colour_valid, &pixbuf)) { + pixbuf = NULL; + late_scanline_count++; + } + queue_remove_blocking(&dvi0.q_tmds_free, &tmdsbuf); + for (uint chan = 0; chan < N_CHAN; chan++) { + uint32_t *out = tmdsbuf + chan * (FRAME_WIDTH / 2); + if (pixbuf != NULL) { + tmds_encode_4bpp(pixbuf + chan * (FRAME_WIDTH / 8), out, FRAME_WIDTH); + } else { + uint32_t word = 0x7fd00; + for (uint j = 0; j < FRAME_WIDTH / 2; j++) { + out[j] = word; + } + } + } + queue_add_blocking(&dvi0.q_tmds_valid, &tmdsbuf); + if (pixbuf) { + queue_add_blocking(&dvi0.q_colour_free, &pixbuf); + } +} + +void dummy_tmds() { + uint32_t *tmdsbuf; + queue_remove_blocking_u32(&dvi0.q_tmds_free, &tmdsbuf); + for (uint chan = 0; chan < N_TMDS_LANES; chan++) { + uint32_t tmds = chan == 1 ? 0xbfe00 : 0x7fd00; + uint32_t *out = tmdsbuf + chan * FRAME_WIDTH / 2; + for (uint i = 0; i < FRAME_WIDTH / 2; i++) { + out[i] = tmds; + } + } + queue_add_blocking_u32(&dvi0.q_tmds_valid, &tmdsbuf); +} + +void __not_in_flash("main") core1_scanline_callback() { + uint32_t *tmdsbuf; + queue_remove_blocking_u32(&dvi0.q_tmds_free, &tmdsbuf); + uint slot_ix = global_render_state.slot_ix; + struct render_slot *slot = &global_render_state.slots[slot_ix]; + // Access to slot->complete is conceptually acquire load + if (slot->cpu_assigned < 0x80000000 && slot->complete) { + encode_scanline(tmdsbuf, slot->dlist, slot->y); + } else { + // underrun + slot->cpu_assigned = ~0; + for (uint chan = 0; chan < N_TMDS_LANES; chan++) { + uint32_t tmds = chan == 1 ? 0x7fd00 : 0xbfe00; + uint32_t *out = tmdsbuf + chan * FRAME_WIDTH / 2; + for (uint i = 0; i < FRAME_WIDTH / 2; i++) { + out[i] = tmds; + } + } + } + queue_add_blocking_u32(&dvi0.q_tmds_valid, &tmdsbuf); + + // Choose a CPU + uint n_in_flight = 0; + for (uint i = 1; i < N_PIX_BUFFERS; i++) { + struct render_slot *slot = &global_render_state.slots[(slot_ix + i) % N_PIX_BUFFERS]; + if (!slot->complete && slot->cpu_assigned == 1) { + n_in_flight++; + } + } + + // This choice tries to keep core 1 busy, but may require excessive + // buffering if close to maximum capacity. Maybe tune. + uint cpu = n_in_flight >= 2 ? 0 : 1; + slot->complete = 0; + slot->cpu_assigned = cpu; + slot->y = global_render_state.y; + if (cpu == 0) { + if (multicore_fifo_wready()) { + // could just write to hw register directly + multicore_fifo_push_blocking(slot_ix); + } else { + slot->cpu_assigned = ~0; + } + } + if (cpu == 1) { + if (!queue_try_add_u32(&global_render_state.q_core1_req, &slot_ix)) { + slot->cpu_assigned = ~0; + } + } + global_render_state.slot_ix = (slot_ix + 1) % N_PIX_BUFFERS; + global_render_state.y++; + if (global_render_state.y == FRAME_HEIGHT) { + global_render_state.y = 0; + gpio_xor_mask(1u << LED_PIN); + } +} + +void __not_in_flash("main") core1_main() { + dvi_register_irqs_this_core(&dvi0, DMA_IRQ_0); + sem_acquire_blocking(&dvi_start_sem); + dvi_start(&dvi0); + + while (1) { + uint slot_ix; + queue_remove_blocking_u32(&global_render_state.q_core1_req, &slot_ix); + struct render_slot *slot = &global_render_state.slots[slot_ix]; + slot->dlist = render_scanline(slot, slot_ix); + slot->complete = 1; + } + + __builtin_unreachable(); +} + +uint8_t bwbuf[FRAME_WIDTH / 8]; +uint32_t graybuf[FRAME_WIDTH / 2]; + +uint delay(uint n) { + uint x = 0; + for (uint i = 0; i < n; i++) { + x = (1 << x); + } + return x >> 31; +} + +char *text[] = { + "This is a demo of text rendering on a Pico board. The current prototype is written in C, but", + "I am hoping to redo it in Rust. All pixels are generated by racing the beam; it is not using", + "a frame buffer. Rendering is structured as a pipeline with a number of stages. The application", + "generates a display list with references to each character, about 16 bytes per character. The", + "middle stage renders this display list to a bitmap buffer. The last stage encodes this bitmap", + "buffer to a TMDS signal using a palette.", + "", + "Right now it's a fairly minimal setup to generate text, and it could be replicated just by", + "having a 1bpp frame buffer (which would be 38k for VGA), but the architecture is intended to", + "scale quite a bit. A scanline can contain multiple runs, and each run can have its own palette,", + "or even be a different graphics type. This is currently used to optimize solid colors. I'm", + "pretty sure that 4bpp gray or palette is possible (which should enable antialiased text), and", + "12bpp RGB is probably viable if it's only a fraction of the scanline.", + "", + "There are a number of interesting things about the architecture. The rendering stages are", + "virtual machines, and implemented in direct threaded style. Currently the middle stage is in", + "C, but ultimately it will be assembler.", + "", + "The architecture based on a display list can obviously support smooth scrolling, which is a", + "hallmark of beam-racing designs as opposed to frame buffers. I also think it could support", + "overlapping windows.", + "", + "We'll see where this goes, but for the time being, I'm just having lots of fun playing with it.", + NULL, +}; + +int __not_in_flash("main") main() { + vreg_set_voltage(VREG_VSEL); + sleep_ms(10); +#ifdef RUN_FROM_CRYSTAL + set_sys_clock_khz(12000, true); +#else + // Run system at TMDS bit clock + set_sys_clock_khz(DVI_TIMING.bit_clk_khz, true); +#endif + + setup_default_uart(); + + gpio_init(LED_PIN); + gpio_set_dir(LED_PIN, GPIO_OUT); + + dvi0.timing = &DVI_TIMING; + dvi0.ser_cfg = DVI_DEFAULT_SERIAL_CONFIG; + dvi0.scanline_callback = core1_scanline_callback; + uint color_spinlock = next_striped_spin_lock_num(); + dvi_init(&dvi0, next_striped_spin_lock_num(), color_spinlock); + global_render_state.slot_ix = 0; + global_render_state.y = 0; + for (int i = 0; i < N_PIX_BUFFERS; i++) { + global_render_state.slots[i].cpu_assigned = 0; + global_render_state.slots[i].complete = 0; + } + queue_init_with_spinlock(&global_render_state.q_core1_req, sizeof(void*), 8, color_spinlock); + +#if 0 + for (int i = 0; i < CHAR_ROWS * CHAR_COLS; ++i) + charbuf[i] = FONT_FIRST_ASCII + i % FONT_N_CHARS; + prepare_pixline(); + init_text_runs("Hello world. I am worried about running out of bandwidth. How close am I to going over the edge?"); + init_expand_lut(); + generate_scanline(0); + tmds_encode_scanline(); +#endif + + sem_init(&dvi_start_sem, 0, 1); + hw_set_bits(&bus_ctrl_hw->priority, BUSCTRL_BUS_PRIORITY_PROC1_BITS); + + for (uint i = 0;; i++) { + if (text[i] == NULL) { + n_textlines = i; + break; + } + setup_text_line(&textlines[i], text[i], FRAME_WIDTH); + uint32_t fg = palette[i % 12]; + make_1bpp_pal(textlines[i].palette, 0, fg); + } + make_1bpp_pal(global_pal, 0, 0x4ba); + make_1bpp_pal(global_pal2, 0x321, 0x4ba); + + for (uint i = 0; i < N_PIX_BUFFERS; i++) { + struct render_slot *slot = &global_render_state.slots[1]; + slot->cpu_assigned = 0; + slot->y = i; + slot->dlist = render_scanline(slot, i); + slot->complete = 1; + } + global_render_state.y = N_PIX_BUFFERS; + core1_scanline_callback(); + multicore_launch_core1(core1_main); + + sem_release(&dvi_start_sem); + while (1) { + // TODO: set up interrupt so we can have a main task. This is + // why we use multicore rather than queue. + uint slot_ix = multicore_fifo_pop_blocking(); + struct render_slot *slot = &global_render_state.slots[slot_ix]; + slot->dlist = render_scanline(slot, slot_ix); + slot->complete = 1; + } + __builtin_unreachable(); +} diff --git a/software/apps/colortext/scanout.S b/software/apps/colortext/scanout.S new file mode 100644 index 0000000..b988c63 --- /dev/null +++ b/software/apps/colortext/scanout.S @@ -0,0 +1,453 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.macro decl_func_x name +.section .scratch_x.\name, "ax" +.global \name +.type \name,%function +.thumb_func +\name: +.endm + +#define decl_func decl_func_x + +decl_func fast_scanout + push {r4, r5, r6, r7} + mov r4, r8 + mov r5, r9 + push {r4, r5} +scanout_continue: + ldmia r0!, {r4} + uxth r5, r4 + adr r6, op_table + lsrs r4, #14 + ldr r4, [r4, r6] + bx r4 + +.align 4 +op_table: + .4byte op_stop + .4byte op_solid + .4byte op_solid_gray + .4byte op_1bpp_pal + +.type op_stop, %function +op_stop: + pop {r4, r5} + mov r8, r4 + mov r9, r5 + pop {r4, r5, r6, r7} + bx lr + +.type op_solid, %function +op_solid: + lsls r5, #2 + adds r5, r2 + mov ip, r5 + ldmia r0!, {r4} + ldr r6, =(tmds_table_4bpp) + movs r7, #0xf + mov ip, r4 + ands r4, r7 + movs r7, #0x44 + muls r4, r4, r7 + ldr r7, [r6, r4] + // TODO finish + b op_stop + +.type op_solid_gray, %function +op_solid_gray: + lsls r5, #2 + adds r5, r2 + ldmia r0!, {r4} + movs r7, #0x44 + muls r4, r4, r7 + ldr r6, =(tmds_table_4bpp) + ldr r4, [r6, r4] +1: + str r4, [r2, r3] + adds r6, r2, r3 + str r4, [r6, r3] + stmia r2!, {r4} +2: + cmp r2, r5 + bne 1b + b op_stop + +.macro tmds_encode_1bpp_pal_body shift_instr shamt + \shift_instr r5, r4, #\shamt + ands r5, r0 // r0 = mask, equals 0x30 + add r5, r8 // r8 = pal + ldm r5, {r5, r6, r7} + str r6, [r2, r3] // r3 = stride + adds r6, r2, r3 + str r7, [r6, r3] + stmia r2!, {r5} +.endm + +.type op_1bpp_pal, %function +op_1bpp_pal: + lsls r5, #2 + adds r5, r2 + mov ip, r5 + // TODO: deal with fractions + ldmia r0!, {r4} + mov r8, r4 + mov r9, r0 + movs r0, #0x30 +1: + ldmia r1!, {r4} + tmds_encode_1bpp_pal_body lsls 4 + tmds_encode_1bpp_pal_body lsls 2 + tmds_encode_1bpp_pal_body lsls 0 + tmds_encode_1bpp_pal_body lsrs 2 + tmds_encode_1bpp_pal_body lsrs 4 + tmds_encode_1bpp_pal_body lsrs 6 + tmds_encode_1bpp_pal_body lsrs 8 + tmds_encode_1bpp_pal_body lsrs 10 + tmds_encode_1bpp_pal_body lsrs 12 + tmds_encode_1bpp_pal_body lsrs 14 + tmds_encode_1bpp_pal_body lsrs 16 + tmds_encode_1bpp_pal_body lsrs 18 + tmds_encode_1bpp_pal_body lsrs 20 + tmds_encode_1bpp_pal_body lsrs 22 + tmds_encode_1bpp_pal_body lsrs 24 + tmds_encode_1bpp_pal_body lsrs 26 + cmp r2, ip + beq 2f + b 1b +2: + mov r0, r9 + b op_stop + +// r0: scan list in direct threaded format +// r1: input buffer +// r2: output buffer +// r3: stride (of output buffer) +decl_func tmds_scan + push {r4, r5, r6, r7} + mov r4, r8 + mov r5, r9 + mov r6, r10 + push {r4, r5, r6} + + // operation, 2x args + // should count be single pixels or double? + ldmia r0!, {r4, r5, r6} + bx r4 + +decl_func tmds_scan_stop + pop {r4, r5, r6} + mov r8, r4 + mov r9, r5 + mov r10, r6 + pop {r4, r5, r6, r7} + bx lr + +// args: count rgb12 +decl_func tmds_scan_solid + mov r8, r1 + lsls r5, #2 + adds r4, r2, r5 + // ip is actual end of output + mov ip, r4 + lsls r5, #28 + lsrs r5, #28 + adds r4, r2, r5 + // r10 is end of fractional part (may be == r2) + mov r10, r4 + mov r4, r6 + lsls r4, #28 + lsrs r4, #28 + movs r7, #0x44 + muls r4, r4, r7 + ldr r1, =(tmds_table_4bpp) + ldr r4, [r1, r4] + adds r7, r2, r3 // beginning of green row + cmp r2, r10 + beq 2f +1: + stmia r2!, {r4} + cmp r2, r10 + bne 1b + cmp r2, ip + beq 4f +2: + mov r5, r4 +3: + stmia r2!, {r4, r5} + stmia r2!, {r4, r5} + cmp r2, ip + bne 3b +4: + + add ip, r3 + add r10, r3 + lsls r4, r6, #24 + lsrs r4, #28 + // load green pixel from palette + movs r5, #0x44 + muls r4, r4, r5 + ldr r4, [r1, r4] + // load red pixel from palette + lsrs r6, #8 + muls r6, r5 + ldr r6, [r1, r6] + adds r1, r7, r3 // beginning of red row + cmp r7, r10 + beq 2f +1: + stmia r7!, {r4} + cmp r7, r10 + bne 1b + cmp r7, ip + beq 4f +2: + mov r5, r4 +3: + stmia r7!, {r4, r5} + stmia r7!, {r4, r5} + cmp r7, ip + bne 3b +4: + // write red + add ip, r3 + add r10, r3 + cmp r1, r10 + beq 2f +1: + stmia r1!, {r6} + cmp r1, r10 + bne 1b + cmp r1, ip + beq 4f +2: + mov r7, r6 +3: + stmia r1!, {r6, r7} + stmia r1!, {r6, r7} + cmp r1, ip + bne 3b +4: + mov r1, r8 + ldmia r0!, {r4, r5, r6} + bx r4 + +// args: count gray4 +decl_func tmds_scan_solid_gray + mov r8, r1 + lsls r5, #2 + adds r4, r2, r5 + // ip is actual end of output + mov ip, r4 + lsls r5, #28 + lsrs r5, #28 + adds r4, r2, r5 + // r10 is end of fractional part (may be == r2) + mov r10, r4 + movs r7, #0x44 + muls r6, r6, r7 + ldr r1, =(tmds_table_4bpp) + ldr r4, [r1, r6] + adds r7, r2, r3 // beginning of green row + cmp r2, r10 + beq 2f +1: + stmia r2!, {r4} + cmp r2, r10 + bne 1b + cmp r2, ip + beq 4f +2: + mov r5, r4 +3: + stmia r2!, {r4, r5} + stmia r2!, {r4, r5} + cmp r2, ip + bne 3b +4: + + add ip, r3 + add r10, r3 + adds r1, r7, r3 // beginning of red row + cmp r7, r10 + beq 2f +1: + stmia r7!, {r4} + cmp r7, r10 + bne 1b + cmp r7, ip + beq 4f +2: +3: + stmia r7!, {r4, r5} + stmia r7!, {r4, r5} + cmp r7, ip + bne 3b +4: + // write red + add ip, r3 + add r10, r3 + cmp r1, r10 + beq 2f +1: + stmia r1!, {r4} + cmp r1, r10 + bne 1b + cmp r1, ip + beq 4f +2: +3: + stmia r1!, {r4, r5} + stmia r1!, {r4, r5} + cmp r1, ip + bne 3b +4: + mov r1, r8 + ldmia r0!, {r4, r5, r6} + bx r4 + +// args: count tmds_blue tmds_green tmds_red +// Not sure we'll keep this. +decl_func tmds_scan_solid_tmds + mov r8, r1 + lsls r5, #2 + adds r4, r2, r5 + // ip is actual end of output + mov ip, r4 + lsls r5, #28 + lsrs r5, #28 + adds r4, r2, r5 + // r10 is end of fractional part (may be == r2) + mov r10, r4 + adds r7, r2, r3 // beginning of green row + cmp r2, r10 + beq 2f +1: + stmia r2!, {r6} + cmp r2, r10 + bne 1b + cmp r2, ip + beq 4f +2: + mov r5, r6 +3: + stmia r2!, {r5, r6} + stmia r2!, {r5, r6} + cmp r2, ip + bne 3b +4: + + add ip, r3 + add r10, r3 + ldmia r0!, {r4, r6} + adds r1, r7, r3 // beginning of red row + cmp r7, r10 + beq 2f +1: + stmia r7!, {r4} + cmp r7, r10 + bne 1b + cmp r7, ip + beq 4f +2: + mov r5, r4 +3: + stmia r7!, {r4, r5} + stmia r7!, {r4, r5} + cmp r7, ip + bne 3b +4: + // write red + add ip, r3 + add r10, r3 + cmp r1, r10 + beq 2f +1: + stmia r1!, {r6} + cmp r1, r10 + bne 1b + cmp r1, ip + beq 4f +2: + mov r7, r6 +3: + stmia r1!, {r6, r7} + stmia r1!, {r6, r7} + cmp r1, ip + bne 3b +4: + mov r1, r8 + ldmia r0!, {r4, r5, r6} + bx r4 + +.macro tmds_scan_1bpp_pal_body shift_instr shamt + \shift_instr r5, r4, #\shamt + ands r5, r0 // r0 = mask, equals 0x30 + add r5, r8 // r8 = pal + ldm r5, {r5, r6, r7} + str r6, [r2, r3] // r3 = stride + adds r6, r2, r3 + str r7, [r6, r3] + stmia r2!, {r5} +.endm + +1: + b 4f +// args: count pal +decl_func tmds_scan_1bpp_pal + lsrs r4, r5, #5 + lsls r5, #2 + adds r5, r2 + mov ip, r5 // actual end of output + mov r8, r6 + mov r9, r0 + lsls r4, #7 + beq 1b + adds r4, r2 + mov r10, r4 // end of whole part + movs r0, #0x30 +2: + ldmia r1!, {r4} + tmds_scan_1bpp_pal_body lsls 4 + tmds_scan_1bpp_pal_body lsls 2 + tmds_scan_1bpp_pal_body lsls 0 + tmds_scan_1bpp_pal_body lsrs 2 + tmds_scan_1bpp_pal_body lsrs 4 + tmds_scan_1bpp_pal_body lsrs 6 + tmds_scan_1bpp_pal_body lsrs 8 + tmds_scan_1bpp_pal_body lsrs 10 + tmds_scan_1bpp_pal_body lsrs 12 + tmds_scan_1bpp_pal_body lsrs 14 + tmds_scan_1bpp_pal_body lsrs 16 + tmds_scan_1bpp_pal_body lsrs 18 + tmds_scan_1bpp_pal_body lsrs 20 + tmds_scan_1bpp_pal_body lsrs 22 + tmds_scan_1bpp_pal_body lsrs 24 + tmds_scan_1bpp_pal_body lsrs 26 + cmp r2, r10 + beq 3f + b 2b +3: + cmp r2, ip + beq 6f +4: + ldmia r1!, {r4} + movs r0, #2 +5: + rors r4, r0 + lsrs r5, r4, #30 + lsls r5, #4 + add r5, r8 // r8 = pal + ldm r5, {r5, r6, r7} + str r6, [r2, r3] // r3 = stride + adds r6, r2, r3 + str r7, [r6, r3] + stmia r2!, {r5} + cmp r2, ip + bne 5b +6: + mov r0, r9 + ldmia r0!, {r4, r5, r6} + bx r4 diff --git a/software/apps/colortext/scanout.h b/software/apps/colortext/scanout.h new file mode 100644 index 0000000..6076d3a --- /dev/null +++ b/software/apps/colortext/scanout.h @@ -0,0 +1,28 @@ +// Fast scanout + +// Every op starts with this: +// uint16_t count; +// uint16_t op; + +// Where count is number of 2-pixel chunks +// op = 0: stop +// op = 1: solid color, next word is 12-bit rgb +// op = 2: solid gray, next word is gray (0..15) +// op = 3: 1-bit palette color, next word is palette + +struct scanlist { + uint16_t count; + uint16_t op; +}; + +void fast_scanout(void *scanlist, uint32_t *inbuf, uint32_t *outbuf, uint32_t stride); + +void tmds_scan_stop(); + +void tmds_scan_solid(); +void tmds_scan_solid_gray(); +void tmds_scan_solid_tmds(); + +void tmds_scan_1bpp_pal(); + +void tmds_scan(uint32_t *scanlist, uint32_t *inbuf, uint32_t *outbuf, uint32_t stride); diff --git a/software/apps/colortext/text.c b/software/apps/colortext/text.c new file mode 100644 index 0000000..2f7e47f --- /dev/null +++ b/software/apps/colortext/text.c @@ -0,0 +1,64 @@ +#include + +// This include is dodgy, but brings in enough of the runtime. +#include "dvi.h" + +#include "gen.h" +#include "text.h" +#include "font.h" + +Run run_heap[8192]; +uint run_heap_ix = 0; + +void setup_text_line(struct text_line *line, const char *text, uint max_w) { + line->runs = &run_heap[run_heap_ix]; + uint x = 0; + char c; + for (uint j = 0; x < max_w && (c = text[j]) != 0; j++) { + uint glyph = c - 32; + uint width = font_widths[glyph]; + if (x + width > max_w) { + width = max_w - x; + } + uint offset = font_x_offsets[glyph]; + Run run = { + .input = font_bits + offset / 32, + .stride = FONT_STRIDE, + .bit_offset = offset & 31, + .count = width + }; + run_heap[run_heap_ix++] = run; + x += width; + } + // space pad the end; this will be better when we have + // a virtual machine + while (x & 31) { + uint glyph = 0; + uint width = font_widths[glyph]; + if ((x & 31) + width > 32) { + width = 32 - (x & 31); + } + uint offset = font_x_offsets[glyph]; + Run run = { + .input = font_bits + offset / 32, + .stride = FONT_STRIDE, + .bit_offset = offset & 31, + .count = width + }; + run_heap[run_heap_ix++] = run; + x += width; + } + line->width = x; + Run empty = { .input = NULL }; + run_heap[run_heap_ix++] = empty; +} + +#define LEADING 5 +#define LINE_SPACING (FONT_HEIGHT + LEADING) + +void render_text_scanline(uint32_t *outp, Run *runs, uint y) { + uint y_line = y % LINE_SPACING; + if (y_line < FONT_HEIGHT) { + process_scanline(runs, y_line, outp); + } +} diff --git a/software/apps/colortext/text.h b/software/apps/colortext/text.h new file mode 100644 index 0000000..6ad54e9 --- /dev/null +++ b/software/apps/colortext/text.h @@ -0,0 +1,8 @@ +struct text_line { + Run *runs; + uint width; + uint32_t palette[16]; +}; + +void setup_text_line(struct text_line *line, const char *text, uint max_w); +void render_text_scanline(uint32_t *outp, Run *runs, uint y); diff --git a/software/include/common_dvi_pin_configs.h b/software/include/common_dvi_pin_configs.h index b3893a3..ed433ed 100644 --- a/software/include/common_dvi_pin_configs.h +++ b/software/include/common_dvi_pin_configs.h @@ -88,4 +88,13 @@ static const struct dvi_serialiser_cfg not_hdmi_featherwing_cfg = { .invert_diffpairs = true }; +// Adafruit Feather RP2040 DVI +static const struct dvi_serialiser_cfg adafruit_feather_dvi_cfg = { + .pio = DVI_DEFAULT_PIO_INST, + .sm_tmds = {0, 1, 2}, + .pins_tmds = {18, 20, 22}, + .pins_clk = 16, + .invert_diffpairs = true +}; + #endif diff --git a/software/libdvi/tmds_encode.S b/software/libdvi/tmds_encode.S index 065061d..94d36f6 100644 --- a/software/libdvi/tmds_encode.S +++ b/software/libdvi/tmds_encode.S @@ -621,3 +621,32 @@ decl_func_x tmds_palette_encode_loop_x tmds_palette_encode_loop decl_func_y tmds_palette_encode_loop_y tmds_palette_encode_loop + +// 4-bit grayscale + +decl_func tmds_encode_loop_4bpp + push {r4, r5, r6, r7, lr} + lsls r2, #1 + add r2, r1 + mov ip, r2 + ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) + b 2f +.align 2 +1: + ldmia r0!, {r4} + str r4, [r2, #ACCUM0_OFFS] + ldr r6, [r2, #PEEK0_OFFS] + ldr r6, [r6] + ldr r7, [r2, #PEEK1_OFFS] + ldr r7, [r7] + lsls r4, #16 + str r4, [r2, #ACCUM0_OFFS] + ldr r4, [r2, #PEEK0_OFFS] + ldr r4, [r4] + ldr r5, [r2, #PEEK1_OFFS] + ldr r5, [r5] + stmia r1!, {r4, r5, r6, r7} +2: + cmp r1, ip + bne 1b + pop {r4, r5, r6, r7, pc} diff --git a/software/libdvi/tmds_encode.c b/software/libdvi/tmds_encode.c index 472b1a9..f0b37e6 100644 --- a/software/libdvi/tmds_encode.c +++ b/software/libdvi/tmds_encode.c @@ -19,6 +19,10 @@ const uint32_t __scratch_y("tmds_table_fullres_y") tmds_table_fullres_y[] = { #include "tmds_table_fullres.h" }; +const uint32_t __scratch_x("tmds_table_4bpp") tmds_table_4bpp[] = { +#include "tmds_table_4bpp.h" +}; + // Configure an interpolator to extract a single colour channel from each of a pair // of pixels, with the first pixel's lsb at pixel_lsb, and the pixels being // pixel_width wide. Produce a LUT address for the first pixel's colour data on @@ -94,6 +98,24 @@ void __not_in_flash_func(tmds_encode_data_channel_8bpp)(const uint32_t *pixbuf, interp_restore(interp1_hw, &interp1_save); } +void __not_in_flash_func(tmds_encode_4bpp)(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix) { + interp_hw_save_t interp0_save; + interp_save(interp0_hw, &interp0_save); + interp_config c; + c = interp_default_config(); + interp_config_set_shift(&c, 14); + interp_config_set_mask(&c, 2, 9); + interp_set_config(interp0_hw, 0, &c); + interp_config_set_shift(&c, 22); + interp_config_set_cross_input(&c, true); + interp_set_config(interp0_hw, 1, &c); + interp0_hw->base[0] = (uint32_t)tmds_table_4bpp; + interp0_hw->base[1] = (uint32_t)tmds_table_4bpp; + + tmds_encode_loop_4bpp(pixbuf, symbuf, n_pix); + interp_restore(interp0_hw, &interp0_save); +} + // ---------------------------------------------------------------------------- // Code for full-resolution TMDS encode (barely possible, utterly impractical): diff --git a/software/libdvi/tmds_encode.h b/software/libdvi/tmds_encode.h index ee8e244..e03679d 100644 --- a/software/libdvi/tmds_encode.h +++ b/software/libdvi/tmds_encode.h @@ -7,6 +7,7 @@ // Functions from tmds_encode.c void tmds_encode_data_channel_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); void tmds_encode_data_channel_8bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); +void tmds_encode_4bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); void tmds_encode_data_channel_fullres_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *symbuf, size_t n_palette); void tmds_setup_palette24_symbols(const uint32_t *palette, uint32_t *symbuf, size_t n_palette); @@ -16,6 +17,7 @@ void tmds_encode_palette_data(const uint32_t *pixbuf, const uint32_t *tmds_palet void tmds_encode_1bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); void tmds_encode_2bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); +void tmds_encode_loop_4bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); // Uses interp0: void tmds_encode_loop_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); @@ -34,4 +36,6 @@ void tmds_fullres_encode_loop_16bpp_leftshift_y(const uint32_t *pixbuf, uint32_t void tmds_palette_encode_loop_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); void tmds_palette_encode_loop_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); +extern const uint32_t tmds_table_4bpp[]; + #endif diff --git a/software/libdvi/tmds_table_4bpp.h b/software/libdvi/tmds_table_4bpp.h new file mode 100644 index 0000000..651d715 --- /dev/null +++ b/software/libdvi/tmds_table_4bpp.h @@ -0,0 +1,257 @@ +// This was auto-generated by garden/tmds.py +0x7fd00, // 0, 1 (+0, +1) +0xc05fa, // e, 2 (-3, +2) +0x7f2a0, // 1f, 4 (-3, +4) +0xc01ef, // 31, 1 (-2, +1) +0xc053f, // 41, 2 (-3, +2) +0xc0dce, // 52, 4 (-3, +4) +0xc0677, // 66, 2 (+0, +2) +0xc067d, // 78, 2 (+1, +2) +0xc057d, // 87, 2 (-1, +2) +0xc0577, // 99, 2 (+0, +2) +0xc0e37, // a6, 4 (-4, +4) +0xc0e3d, // b8, 4 (-3, +4) +0xc02ef, // ce, 1 (+2, +1) +0xc0ee3, // da, 4 (-3, +4) +0x7f20c, // eb, 4 (-3, +4) +0x7fe00, // ff, 1 (+0, +1) +0xfe901, // 3, f (+3, -2) +0x43d0f, // 11, 11 (+0, +0) +0x43d1e, // 22, 11 (+0, +0) +0xfc511, // 33, 12 (+0, +1) +0x43d3c, // 44, 11 (+0, +0) +0x43d33, // 55, 11 (+0, +0) +0x7c1d8, // 68, 10 (+2, -1) +0x43e78, // 77, 11 (+0, +0) +0x43d78, // 88, 11 (+0, +0) +0xfc588, // 98, 12 (-1, +1) +0x43e33, // aa, 11 (+0, +0) +0x43e3c, // bb, 11 (+0, +0) +0xfc611, // cc, 12 (+0, +1) +0x43e1e, // dd, 11 (+0, +0) +0x43e0f, // ee, 11 (+0, +0) +0xfce01, // fc, 14 (-3, +3) +0xa81fc, // 4, 1f (+4, -3) +0x4790f, // 11, 22 (+0, +0) +0x4791e, // 22, 22 (+0, +0) +0x47d13, // 35, 21 (+2, -1) +0x4793c, // 44, 22 (+0, +0) +0x47933, // 55, 22 (+0, +0) +0x471dc, // 64, 24 (-2, +2) +0x47a78, // 77, 22 (+0, +0) +0x47978, // 88, 22 (+0, +0) +0x47e23, // 9a, 21 (+1, -1) +0x47a33, // aa, 22 (+0, +0) +0x47a3c, // bb, 22 (+0, +0) +0x472ec, // cb, 24 (-1, +2) +0x47a1e, // dd, 22 (+0, +0) +0x47a0f, // ee, 22 (+0, +0) +0xf8e03, // fa, 24 (-5, +2) +0x7bd01, // 3, 31 (+3, -2) +0xfb105, // f, 35 (-2, +2) +0x7b1e0, // 20, 34 (-2, +1) +0x7b911, // 33, 32 (+0, -1) +0x7b1c1, // 43, 34 (-1, +1) +0xc4667, // 56, 32 (+1, -1) +0x44677, // 66, 33 (+0, +0) +0x4467d, // 78, 33 (+1, +0) +0x4457d, // 87, 33 (-1, +0) +0x44577, // 99, 33 (+0, +0) +0x7b198, // a8, 34 (-2, +1) +0xc463e, // bd, 32 (+2, -1) +0x7ba11, // cc, 32 (+0, -1) +0xc461f, // de, 32 (+1, -1) +0x7b20e, // ed, 34 (-1, +1) +0xfba01, // fc, 33 (-3, +0) +0x701fc, // 4, 40 (+4, -4) +0x4f10f, // 11, 44 (+0, +0) +0x4f11e, // 22, 44 (+0, +0) +0xcf911, // 33, 43 (+0, -1) +0x4f13c, // 44, 44 (+0, +0) +0x4f133, // 55, 44 (+0, +0) +0xcfa88, // 67, 43 (+1, -1) +0x4f278, // 77, 44 (+0, +0) +0x4f178, // 88, 44 (+0, +0) +0x706dc, // 9b, 43 (+2, -1) +0x4f233, // aa, 44 (+0, +0) +0x4f23c, // bb, 44 (+0, +0) +0xcfa11, // cc, 43 (+0, -1) +0x4f21e, // dd, 44 (+0, +0) +0x4f20f, // ee, 44 (+0, +0) +0xf1e03, // fa, 48 (-5, +4) +0x73d03, // 5, 51 (+5, -4) +0x4cd0f, // 11, 55 (+0, +0) +0x4cd1e, // 22, 55 (+0, +0) +0x261ee, // 32, 56 (-1, +1) +0x4cd3c, // 44, 55 (+0, +0) +0x4cd33, // 55, 55 (+0, +0) +0x26277, // 66, 56 (+0, +1) +0x4ce78, // 77, 55 (+0, +0) +0x4cd78, // 88, 55 (+0, +0) +0x26177, // 99, 56 (+0, +1) +0x4ce33, // aa, 55 (+0, +0) +0x4ce3c, // bb, 55 (+0, +0) +0x99e13, // ca, 56 (-2, +1) +0x4ce1e, // dd, 55 (+0, +0) +0x4ce0f, // ee, 55 (+0, +0) +0x262fc, // fb, 56 (-4, +1) +0x221fe, // 2, 66 (+2, +0) +0x48df1, // 13, 65 (+2, -1) +0x7711c, // 24, 64 (+2, -2) +0x9dd11, // 33, 66 (+0, +0) +0x1ddc1, // 43, 67 (-1, +1) +0x48e67, // 56, 65 (+1, -1) +0xa2277, // 66, 67 (+0, +1) +0xf7282, // 79, 65 (+2, -1) +0xa217d, // 87, 67 (-1, +1) +0xa2177, // 99, 67 (+0, +1) +0x77231, // ac, 64 (+2, -2) +0x48e3e, // bd, 65 (+2, -1) +0x9de11, // cc, 66 (+0, +0) +0x48e1f, // de, 65 (+1, -1) +0x1de0e, // ed, 67 (-1, +1) +0x222fe, // fd, 66 (-2, +0) +0x209fe, // 2, 78 (+2, +1) +0x9e10f, // 11, 77 (+0, +0) +0x9e11e, // 22, 77 (+0, +0) +0x9f511, // 33, 78 (+0, +1) +0x9e13c, // 44, 77 (+0, +0) +0x9e133, // 55, 77 (+0, +0) +0xa1dd8, // 68, 76 (+2, -1) +0x9e278, // 77, 77 (+0, +0) +0x9e178, // 88, 77 (+0, +0) +0x9f588, // 98, 78 (-1, +1) +0x9e233, // aa, 77 (+0, +0) +0x9e23c, // bb, 77 (+0, +0) +0x9f611, // cc, 78 (+0, +1) +0x9e21e, // dd, 77 (+0, +0) +0x9e20f, // ee, 77 (+0, +0) +0x20afe, // fd, 78 (-2, +1) +0xdf501, // 3, 86 (+3, -2) +0x5e10f, // 11, 88 (+0, +0) +0x5e11e, // 22, 88 (+0, +0) +0x5f511, // 33, 87 (+0, -1) +0x5e13c, // 44, 88 (+0, +0) +0x5e133, // 55, 88 (+0, +0) +0x5f688, // 67, 87 (+1, -1) +0x5e278, // 77, 88 (+0, +0) +0x5e178, // 88, 88 (+0, +0) +0xe0adc, // 9b, 87 (+2, -1) +0x5e233, // aa, 88 (+0, +0) +0x5e23c, // bb, 88 (+0, +0) +0x5f611, // cc, 87 (+0, -1) +0x5e21e, // dd, 88 (+0, +0) +0x5e20f, // ee, 88 (+0, +0) +0x612fc, // fb, 8c (-4, +4) +0xddd01, // 3, 98 (+3, -1) +0xe21f1, // 13, 99 (+2, +0) +0xe211f, // 21, 99 (-1, +0) +0x5dd11, // 33, 99 (+0, +0) +0x88d3e, // 42, 9a (-2, +1) +0xe2267, // 56, 99 (+1, +0) +0x62277, // 66, 98 (+0, -1) +0x6227d, // 78, 98 (+1, -1) +0xb6187, // 89, 97 (+1, -2) +0x62177, // 99, 98 (+0, -1) +0xe2167, // a9, 99 (-1, +0) +0xb72c2, // b9, 9b (-2, +2) +0x5de11, // cc, 99 (+0, +0) +0xe221f, // de, 99 (+1, +0) +0x88ef1, // ec, 9a (-2, +1) +0xdde01, // fc, 98 (-3, -1) +0x321fc, // 4, a6 (+4, -4) +0x8cd0f, // 11, aa (+0, +0) +0x8cd1e, // 22, aa (+0, +0) +0x59d13, // 35, a9 (+2, -1) +0x8cd3c, // 44, aa (+0, +0) +0x8cd33, // 55, aa (+0, +0) +0x8c5dc, // 64, ac (-2, +2) +0x8ce78, // 77, aa (+0, +0) +0x8cd78, // 88, aa (+0, +0) +0x59e23, // 9a, a9 (+1, -1) +0x8ce33, // aa, aa (+0, +0) +0x8ce3c, // bb, aa (+0, +0) +0x8c6ec, // cb, ac (-1, +2) +0x8ce1e, // dd, aa (+0, +0) +0x8ce0f, // ee, aa (+0, +0) +0x0c6fc, // fb, ad (-4, +3) +0x309fc, // 4, b8 (+4, -3) +0x8f10f, // 11, bb (+0, +0) +0x8f11e, // 22, bb (+0, +0) +0xb09ec, // 34, b9 (+1, -2) +0x8f13c, // 44, bb (+0, +0) +0x8f133, // 55, bb (+0, +0) +0xb0dd8, // 68, ba (+2, -1) +0x8f278, // 77, bb (+0, +0) +0x8f178, // 88, bb (+0, +0) +0xb0adc, // 9b, b9 (+2, -2) +0x8f233, // aa, bb (+0, +0) +0x8f23c, // bb, bb (+0, +0) +0xb06ec, // cb, bc (-1, +1) +0x8f21e, // dd, bb (+0, +0) +0x8f20f, // ee, bb (+0, +0) +0x302fe, // fd, be (-2, +3) +0x045fe, // 2, cd (+2, +1) +0x3b90e, // 12, cc (+1, +0) +0xbb11c, // 24, cb (+2, -1) +0xbb911, // 33, cd (+0, +1) +0x3b9c1, // 43, cc (-1, +0) +0xbb298, // 57, cb (+2, -1) +0x84677, // 66, cc (+0, +0) +0x8467d, // 78, cc (+1, +0) +0x8457d, // 87, cc (-1, +0) +0x84577, // 99, cc (+0, +0) +0xbb231, // ac, cb (+2, -1) +0x3bac1, // bc, cc (+1, +0) +0xbba11, // cc, cd (+0, +1) +0xbb2e0, // df, cb (+2, -1) +0x3ba0e, // ed, cc (-1, +0) +0x046fe, // fd, cd (-2, +1) +0x071fc, // 4, da (+4, -3) +0x8790f, // 11, dd (+0, +0) +0x8791e, // 22, dd (+0, +0) +0x381ee, // 32, de (-1, +1) +0x8793c, // 44, dd (+0, +0) +0x87933, // 55, dd (+0, +0) +0x38277, // 66, de (+0, +1) +0x87a78, // 77, dd (+0, +0) +0x87978, // 88, dd (+0, +0) +0x38177, // 99, de (+0, +1) +0x87a33, // aa, dd (+0, +0) +0x87a3c, // bb, dd (+0, +0) +0x87e13, // ca, de (-2, +1) +0x87a1e, // dd, dd (+0, +0) +0x87a0f, // ee, dd (+0, +0) +0xd7e01, // fc, e0 (-3, +3) +0x831fc, // 4, eb (+4, -3) +0x83d0f, // 11, ee (+0, +0) +0x83d1e, // 22, ee (+0, +0) +0x839ec, // 34, ed (+1, -1) +0x83d3c, // 44, ee (+0, +0) +0x83d33, // 55, ee (+0, +0) +0x3e923, // 65, f0 (-1, +2) +0x83e78, // 77, ee (+0, +0) +0x83d78, // 88, ee (+0, +0) +0x83adc, // 9b, ed (+2, -1) +0x83e33, // aa, ee (+0, +0) +0x83e3c, // bb, ee (+0, +0) +0x3ea13, // ca, f0 (-2, +2) +0x83e1e, // dd, ee (+0, +0) +0x83e0f, // ee, ee (+0, +0) +0x016fe, // fd, f1 (-2, +3) +0xbfd00, // 0, fe (+0, -1) +0x3f90c, // 14, fc (+3, -3) +0x3f11c, // 24, fa (+2, -5) +0x3fd10, // 30, ff (-3, +0) +0x3f138, // 48, fa (+4, -5) +0x3f1c8, // 58, fa (+3, -5) +0x3fa88, // 67, fc (+1, -3) +0x3fa82, // 79, fc (+2, -3) +0xbf184, // 8c, fb (+4, -4) +0x3fa21, // 9c, fc (+3, -3) +0xbf230, // af, fb (+5, -4) +0xbf2c0, // bf, fb (+4, -4) +0xbfa10, // cf, fd (+3, -2) +0x3f9a0, // e0, fc (+3, -3) +0x3fa05, // f0, fc (+2, -3) +0xbfe00, // ff, fe (+0, -1) diff --git a/software/scripts/gen_4bpp_lut.py b/software/scripts/gen_4bpp_lut.py new file mode 100644 index 0000000..faa4e7e --- /dev/null +++ b/software/scripts/gen_4bpp_lut.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +# A script to generate TMDS lookup tables for pairs of 4bpp pixels. +# +# All TMDS symbol pairs generated are DC balanced. The script uses a +# search to find the DC balanced symbol pair with the least error +# compared with the desired pixels. + +def popcount(x): + n = 0 + while x: + n += 1 + x = x & (x - 1) + return n + +# Equivalent to N1(q) - N0(q) in the DVI spec +def byteimbalance(x): + return 2 * popcount(x) - 8 + +class TMDSEncode: + ctrl_syms = { + 0b00: 0b1101010100, + 0b01: 0b0010101011, + 0b10: 0b0101010100, + 0b11: 0b1010101011 + } + def __init__(self): + self.imbalance = 0 + + def encode(self, d, c, de): + if not de: + self.imbalance = 0 + return self.ctrl_syms[c] + # Minimise transitions + q_m = d & 0x1 + if popcount(d) > 4 or (popcount(d) == 4 and not d & 0x1): + for i in range(7): + q_m = q_m | (~(q_m >> i ^ d >> i + 1) & 0x1) << i + 1 + else: + for i in range(7): + q_m = q_m | ( (q_m >> i ^ d >> i + 1) & 0x1) << i + 1 + q_m = q_m | 0x100 + # Correct DC balance + inversion_mask = 0x2ff + q_out = 0 + if self.imbalance == 0 or byteimbalance(q_m & 0xff) == 0: + q_out = q_m ^ (0 if q_m & 0x100 else inversion_mask) + if q_m & 0x100: + self.imbalance += byteimbalance(q_m & 0xff) + else: + self.imbalance -= byteimbalance(q_m & 0xff) + elif (self.imbalance > 0) == (byteimbalance(q_m & 0xff) > 0): + q_out = q_m ^ inversion_mask + self.imbalance += ((q_m & 0x100) >> 7) - byteimbalance(q_m & 0xff) + else: + q_out = q_m + self.imbalance += byteimbalance(q_m & 0xff) - ((~q_m & 0x100) >> 7) + return q_out + +def gen_4bpp_lut(): + e = TMDSEncode() + for i in range(256): + g0 = (i % 16) * 17 + g1 = (i // 16) * 17 + dithpats = ((0, 0), (0, 1), (0, -1), (1, 0), (-1, 0), (1, -1), (-1, 1), + (2, -2), (-2, 2), (2, -1), (-2, 1), (1, -2), (-1, 2), + (2, 0), (-2, 0), (0, 2), (0, -2), + (2, 1), (-2, -1), (1, 2), (-1, -2), (2, 2), (-2, -2), + (3, -3), (-3, 3), (3, -2), (-3, 2), (2, -3), (-2, 3), + (3, -1), (-3, 1), (1, -3), (-1, 3), + (3, 0), (-3, 0), (0, 3), (0, -3), + (3, 1), (-3, -1), (-1, -3), (1, 3), + #(3, 2), (-3, -2), (-2, -3), (2, 3), (3, 3), (-3, -3), + (4, -4), (-4, 4), (4, -3), (-4, 3), (3, -4), (-3, 4), + (4, -2), (-4, 2), (2, -4), (-4, 2), + (4, -1), (-4, 1), (1, -4), (-1, 4), + (4, 0), (-4, 0), (0, 4), (0, -4), + #(4, 1), (-4, -1), (-1, -4), (1, 4), + #(4, 2), (-4, -2), (-2, -4), (2, 4), + #(4, 3), (-4, -3), (-3, -4), (3, 4), (4, 4), (-4, -4), + (5, -5), (-5, 5), (5, -4), (-5, 4), (4, -5), (-4, 5), + (5, -3), (-5, 3), (3, -5), (-5, 3), + (5, -2), (-5, 2), (2, -5), (-5, 2), + ) + found = False + for a, b in dithpats: + h0 = g0 + a + h1 = g1 + b + if h0 < 0 or h0 > 255 or h1 < 0 or h1 > 255: + continue + e.imbalance = 0 + t0 = e.encode(h0, 0, True) + t1 = e.encode(h1, 0, True) + if e.imbalance == 0: + word = (t1 << 10) | t0 + #print(f'\t.word 0x{word:05x} // {h0:2x}, {h1:2x} ({a:+}, {b:+})') + print(f'0x{word:05x}, // {h0:2x}, {h1:2x} ({a:+}, {b:+})') + found = True + break + if not found: + print(f'error {g0:2x} {g1:2x}') + +gen_4bpp_lut() diff --git a/software/scripts/repack_pcf.py b/software/scripts/repack_pcf.py new file mode 100644 index 0000000..f855a62 --- /dev/null +++ b/software/scripts/repack_pcf.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +# A script to repack a PCF font file to one that can be used by the +# colortext program. +# +# A good source of suitable PCF files is the X11 distribution. +# +# This script has quite a number of limitations. It hardcodes some of +# the encoding, and there is limited to ASCII. It assumes a number of +# settings (word size etc) that are true for the fonts tested, but not +# of PCF fonts in general. It clips glyphs with negative side bearings +# (typically the descender of the lowercase j). +# +# Input PCF filename is given as argument, C code on stdout. + +import sys +import struct + +class Pcf: + def __init__(self, data): + self.data = data + n_tables = geti32(data, 4) + tables = {} + for i in range(n_tables): + table_type = geti32(data, 16 * i + 8) + table_len = geti32(data, 16 * i + 16) + table_offset = geti32(data, 16 * i + 20) + table_data = data[table_offset : table_offset + table_len] + tables[table_type] = table_data + self.tables = tables + self.metrics = self.dometrics() + self.bitmaps = self.dobitmap() + + def dometrics(self): + data = self.tables[4] + format = geti32(data, 0) + metrics_count = geti16(data, 4) + metrics = [] + for i in range(metrics_count): + lsb = getu8(data, 5 * i + 6) - 128 + rsb = getu8(data, 5 * i + 7) - 128 + width = getu8(data, 5 * i + 8) - 128 + asc = getu8(data, 5 * i + 9) - 128 + desc = getu8(data, 5 * i + 10) - 128 + metrics.append((lsb, rsb, width, asc, desc)) + return metrics + + def dobitmap(self): + data = self.tables[8] + format = geti32(data, 0) + glyph_count = geti32(data, 4) + glyphdata = data[24 + 4 * glyph_count:] + bitmaps = [] + for i in range(glyph_count): + offset = geti32(data, i * 4 + 8) + metrics = self.metrics[i] + #print(i, hex(i + 32), offset, metrics) + height = metrics[3] + metrics[4] + # most likely stride is width rounded up to 1 << format + stride = 4 + bitmap = [getu32(glyphdata, offset + stride * j) for j in range(height)] + if False: + for word in bitmap: + s = '' + while word: + if word % 2: + s += '##' + else: + s += ' ' + word = word >> 1 + print(s) + bitmaps.append(bitmap) + return bitmaps + + def render(self): + # Only worry about ASCII printable for now + n = 95 + asc = max(m[3] for m in self.metrics[:n]) + desc = max(m[4] for m in self.metrics[:n]) + height = asc + desc + x = 0 + pos = [] + for m in self.metrics[:n]: + pos.append(x) + x += m[2] + width = x + (31 & -x) + bits = [[0] * width for i in range(height)] + for j in range(n): + m = self.metrics[j] + y = asc - m[3] + w = m[2] + for word in self.bitmaps[j]: + x = m[0] + while word: + if word % 2 and x >= 0 and x < w and y >= 0 and y < height: + bits[y][x + pos[j]] = 1 + word = word >> 1 + x += 1 + y += 1 + stride = width // 8 + print(f'#define FONT_STRIDE {stride}') + print(f'#define FONT_HEIGHT {height}') + print_c_array('font_x_offsets', pos, 'uint16_t') + widths = [m[2] for m in self.metrics[:n]] + print_c_array('font_widths', widths, 'uint8_t') + words = [] + for row in bits: + for x in range(0, width, 32): + word = 0 + for j in range(32): + word |= row[x + j] << j + words.append(word) + print_c_array('font_bits', words, 'uint32_t', True) + +def print_c_array(name, data, ty, is_hex = False): + print(f'static const {ty} {name}[] = {{') + buf = '' + for x in data: + s = hex(x) if is_hex else str(x) + if len(buf) + len(s) <= 72: + if buf == '': + buf = ' ' + s + else: + buf += ', ' + s + else: + print(buf + ',') + buf = ' ' + s + print(buf) + print('};') + + +def geti32(data, offset): + return struct.unpack('