/*  Sjaak, a program for playing chess variants
 *  Copyright (C) 2011  Evert Glebbeek
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#include <stdbool.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <ctype.h>
#include "bitboard.h"
#include "board.h"
#include "square.h"
#include "bits.h"
#include "movegen.h"
#include "moveflags.h"
#include "move.h"

/*****************************************
 * Data structures for normal 8x8 board. *
 *****************************************/
uint8_t leaper_description[MAX_LEAPER_TYPES]; // (n,m) encoded in two nibbles
bitboard_t leapers[MAX_LEAPER_TYPES][64];
bitboard_t reverse_leapers[MAX_LEAPER_TYPES][64];
int number_of_leapers = 0;

uint32_t stepper_description[MAX_STEPPER_TYPES]; // 8 directions, with repeat counts (0-7) for each->24 bits
bitboard_t stepper_step[MAX_STEPPER_TYPES][64];  // Single step bitmasks
int number_of_steppers = 1;

/* Bitshifts for all directions: N   NE  E   SE    S   SW    W   NW */
int step_shift[8] =           {  8,  9,  1,  -7,  -8,  -9,  -1,  7 };

/* Slider and hopper moves, for one rank/file (need to shift) */
bitboard_t hmslider[8][256];    // Only need 6 bits of occ, but keep code simple and general
bitboard_t vmslider[8][256];    // Only need 6 bits of occ, but keep code simple and general
bitboard_t hmhopper[8][256];    // Need full 8 bits of occ
bitboard_t vmhopper[8][256];    // Need full 8 bits of occ

/* Slider and hopper captures */
bitboard_t hcslider[8][256];    // Only need 6 bits of occ, but keep code simple and general
bitboard_t vcslider[8][256];    // Only need 6 bits of occ, but keep code simple and general
bitboard_t hchopper[8][256];    // Need full 8 bits of occ
bitboard_t vchopper[8][256];    // Need full 8 bits of occ

/* Attack bitboard for a "super piece" on an empty board. A superpiece can move as any piece in the game. This
 * is useful when testing for checks or attacks to specific squares: we know before hand that we only need to
 * consider pieces on squares that can be reached by a superpiece on the target square.
 */
bitboard_t super[64];

/* The ray connecting the two given squares */
bitboard_t connecting_ray[64][64];

/* Squares taken into account when computing king safety */
bitboard_t king_zone[64];

/*****************************************
 * Data structures for large board.      *
 *****************************************/
large_bitboard_t large_leapers[MAX_LEAPER_TYPES][128];
large_bitboard_t large_reverse_leapers[MAX_LEAPER_TYPES][128];
large_bitboard_t large_stepper_step[MAX_STEPPER_TYPES][128];  // Single step bitmasks

/* Bitshifts for all directions: N   NE  E   SE    S   SW    W   NW */
int large_step_shift[8];

/* Slider and hopper moves, for one rank/file (need to shift). There are up to 16 ranks/files, up to 2^16
 * occupancy states, but it is wasteful to reserve enough space for all these possibilities, so we make do
 * with considerably less and allocate memory dynamically.
 * This should still be accessed [file/rank][occupancy pattern]
 */
large_bitboard_t **large_hmslider = NULL;
large_bitboard_t **large_vmslider = NULL;
large_bitboard_t **large_hmhopper = NULL;
large_bitboard_t **large_vmhopper = NULL;

/* Slider and hopper captures */
large_bitboard_t **large_hcslider = NULL;
large_bitboard_t **large_vcslider = NULL;
large_bitboard_t **large_hchopper = NULL;
large_bitboard_t **large_vchopper = NULL;

/* Attack bitboard for a "super piece" on an empty board. A superpiece can move as any piece in the game. This
 * is useful when testing for checks or attacks to specific squares: we know before hand that we only need to
 * consider pieces on squares that can be reached by a superpiece on the target square.
 */
large_bitboard_t large_super[128];

/* The ray connecting the two given squares */
large_bitboard_t large_connecting_ray[128][128];

/* Masks for stepper move generation */
large_bitboard_t large_step_mask[8];

static bool use_large_tables = false;


static int min(int x, int y) { return (x<y)?x:y; }
static int max(int x, int y) { return (x>y)?x:y; }


void reset_piece_definitions(void)
{
   number_of_leapers = 0;
   number_of_steppers = 1;
}


void initialise_slider_tables(void)
{
   int file, rank, square;
   int occ_mask, occ;
   int n;

   memset(&hmslider, 0, sizeof hmslider);
   memset(&vmslider, 0, sizeof vmslider);
   memset(&hmhopper, 0, sizeof hmhopper);
   memset(&vmhopper, 0, sizeof vmhopper);

   memset(&hcslider, 0, sizeof hcslider);
   memset(&vcslider, 0, sizeof vcslider);
   memset(&hchopper, 0, sizeof hchopper);
   memset(&vchopper, 0, sizeof vchopper);

   for (file = 0; file<8; file++) {
      for (occ = 0; occ < 256; occ++) {
         hmslider[file][occ] = 0;
         hcslider[file][occ] = 0;

         /* Left of slider position, rook and cannon moves, rook
          * attacks.
          */
         for (n=file-1; n>=0; n--) {
            if ( occ & (1 << n) ) {
               hcslider[file][occ] |= make_bitboard_square(n);
               break;
            }

            hmslider[file][occ] |= make_bitboard_square(n);
         }
         n--;
         /* Cannon attacks */
         for (; n>=0; n--) {
            if ( occ & (1 << n) ) {
               hchopper[file][occ] |= make_bitboard_square(n);
               break;
            }
            hmhopper[file][occ] |= make_bitboard_square(n);
         }

         /* Right of slider position */
         for (n=file+1; n<8; n++) {
            if ( occ & (1 << n) ) {
               hcslider[file][occ] |= make_bitboard_square(n);
               break;
            }

            hmslider[file][occ] |= make_bitboard_square(n);
         }
         n++;
         /* Cannon attacks */
         for (; n<8; n++) {
            if ( occ & (1 << n) ) {
               hchopper[file][occ] |= make_bitboard_square(n);
               break;
            }
            hmhopper[file][occ] |= make_bitboard_square(n);
         }
      }
   }

   /* File attacks
    * We can actually use a trick to get these from rank attacks with the same occupancy number as well.
    * For the A-file:
    *  file_attacks[occ] = ((rank_attacks[occ] & B1C1D1E1F1G1H1) * H1-A8 << 7 | (rank_attacks[occ]&A1)) & afile
    */
   for (rank = 0; rank < 8; rank++) {
      for (occ = 0; occ < 256; occ++) {
         vmslider[rank][occ] = 0;
         vcslider[rank][occ] = 0;

         /* South of slider position, rook and cannon moves, rook
          * attacks.
          */
         for (n=rank-1; n>=0; n--) {
            if ( occ & (1 << n) ) {
               vcslider[rank][occ] |= make_bitboard_square(8*n);
               break;
            }

            vmslider[rank][occ] |= make_bitboard_square(8*n);
         }
         n--;
         /* Cannon attacks */
         for (; n>=0; n--) {
            if ( occ & (1 << n) ) {
               vchopper[rank][occ] |= make_bitboard_square(8*n);
               break;
            }
            vmhopper[rank][occ] |= make_bitboard_square(8*n);
         }

         /* North of slider position */
         for (n=rank+1; n<8; n++) {
            if ( occ & (1 << n) ) {
               vcslider[rank][occ] |= make_bitboard_square(8*n);
               break;
            }

            vmslider[rank][occ] |= make_bitboard_square(8*n);
         }
         n++;
         /* Cannon attacks */
         for (; n<8; n++) {
            if ( occ & (1 << n) ) {
               vchopper[rank][occ] |= make_bitboard_square(8*n);
               break;
            }
            vmhopper[rank][occ] |= make_bitboard_square(8*n);
         }
      }
   }

   /* Include the sentinel square (the capture location) in the move destinations. This will be masked by the
    * move generator anyway and it will allow us a small optimisation by skipping a separate capture
    * generation for pieces that capture as they move (which is normal).
    */
   for (file = 0; file<8; file++) {
      for (occ = 0; occ < 1<<8; occ++) {
         hmslider[file][occ] |= hcslider[file][occ];
         hmhopper[file][occ] |= hchopper[file][occ];
      }
   }
   for (rank = 0; rank < 8; rank++) {
      for (occ = 0; occ < 1<<8; occ++) {
         vmslider[rank][occ] |= vcslider[rank][occ];
         vmhopper[rank][occ] |= vchopper[rank][occ];
      }
   }

   /* Initialise superpiece attacks to a full board */
   for (n=0; n<64; n++)
      super[n] = board_all;

   /* Connecting rays */
   memset(connecting_ray, 0, sizeof connecting_ray);
   for (square = A1; square <= H8; square++) {
      int attack;
      for (attack = square+1; attack<=H8; attack++) {
         int row = unpack_rank(square);
         int file = unpack_file(square);
         if (row == unpack_rank(attack)) {
            int n;
            for (n=file;n<=unpack_file(attack);n++)
               connecting_ray[square][attack] |=
                  make_bitboard_square(pack_row_file(row, n));
         }
         if (file == unpack_file(attack)) {
            int n;
            for (n=row;n<=unpack_rank(attack);n++)
               connecting_ray[square][attack] |=
                  make_bitboard_square(pack_row_file(n, file));
         }
         if (get_a1h8_diagonal(square) == get_a1h8_diagonal(attack)) {
            int n;
            for (n=square;n<=(attack);n+=9)
               connecting_ray[square][attack] |= make_bitboard_square(n);
         }
         if (get_a8h1_diagonal(square) == get_a8h1_diagonal(attack)) {
            int n;
            for (n=square;n<=(attack);n+=7)
               connecting_ray[square][attack] |= make_bitboard_square(n);
         }
         connecting_ray[attack][square] = connecting_ray[square][attack];
      }
   }

   /* Attack zone around the king */
   for (square = A1; square <= H8; square++) {
      king_zone[square] = make_bitboard_square(square);
      king_zone[square] |= (king_zone[square] & board_minus_a) >> 1;
      king_zone[square] |= (king_zone[square] & board_minus_h) << 1;
      king_zone[square] |= king_zone[square] >> 8;
      king_zone[square] |= king_zone[square] << 8;
   }
}



static void alloc_large_slider_tables(void)
{
   size_t file_table_size;
   size_t rank_table_size;
   size_t file_table_start;
   size_t rank_table_start;
   size_t file_table_offset;
   size_t rank_table_offset;
   size_t offset = 0;
   uint8_t *memory;
   int n;

   /* Determine size of tables: rank attacks, so index by file */
   rank_table_size  = large_board_files*sizeof(large_bitboard_t *);
   if (rank_table_size & 127) rank_table_size += 128 - (rank_table_size & 127);
   rank_table_start = rank_table_size;
   rank_table_size += large_board_files*(1<<large_board_files)*sizeof(large_bitboard_t);
   rank_table_size += 128;
   rank_table_offset = (1<<large_board_files)*sizeof(large_bitboard_t);

   /* Determine size of tables: file attacks, so index by rank */
   file_table_size  = large_board_ranks*sizeof(large_bitboard_t *);
   if (file_table_size & 127) file_table_size += 128 - (file_table_size & 127);
   file_table_start = file_table_size;
   file_table_size += large_board_ranks*(1<<large_board_ranks)*sizeof(large_bitboard_t);
   file_table_size += 128;
   file_table_offset = (1<<large_board_ranks)*sizeof(large_bitboard_t);

   /* Allocate tables for horizontal (rank) attacks */
   memory = realloc(large_hmslider, rank_table_size);
   assert(memory);
   memset(memory, 0, rank_table_size);
   large_hmslider = (large_bitboard_t **)memory;
   /* It's still possible for the bitboard table to not start on a word boundary, we need to correct for that */
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+rank_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + rank_table_start) & 127);
#endif
   //printf("Base: %p %p %08x\n", memory, memory + rank_table_start, (int)rank_table_offset);
   for(n = 0; n<large_board_files; n++) {
      large_hmslider[n] = (large_bitboard_t *)(memory + offset + rank_table_start + n*rank_table_offset);
      //printf("[%2d]: %p\n", n, large_hmslider+n);
      //printf("      %p\n", large_hmslider[n]);
   }

   memory = realloc(large_hmhopper, rank_table_size);
   assert(memory);
   memset(memory, 0, rank_table_size);
   large_hmhopper = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+rank_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + rank_table_start) & 127);
#endif
   for(n = 0; n<large_board_files; n++) {
      large_hmhopper[n] = (large_bitboard_t *)(memory + offset + rank_table_start + n*rank_table_offset);
   }

   memory = realloc(large_hcslider, rank_table_size);
   assert(memory);
   memset(memory, 0, rank_table_size);
   large_hcslider = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+rank_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + rank_table_start) & 127);
#endif
   for(n = 0; n<large_board_files; n++) {
      large_hcslider[n] = (large_bitboard_t *)(memory + offset + rank_table_start + n*rank_table_offset);
   }

   memory = realloc(large_hchopper, rank_table_size);
   assert(memory);
   memset(memory, 0, rank_table_size);
   large_hchopper = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+rank_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + rank_table_start) & 127);
#endif
   for(n = 0; n<large_board_files; n++) {
      large_hchopper[n] = (large_bitboard_t *)(memory + offset + rank_table_start + n*rank_table_offset);
   }

   /* Allocate tables for vertical (file) attacks */
   memory = realloc(large_vmslider, file_table_size);
   assert(memory);
   memset(memory, 0, file_table_size);
   large_vmslider = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+file_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + file_table_start) & 127);
#endif
   for(n = 0; n<large_board_ranks; n++) {
      large_vmslider[n] = (large_bitboard_t *)(memory + offset + file_table_start + n*file_table_offset);
   }

   memory = realloc(large_vmhopper, file_table_size);
   assert(memory);
   memset(memory, 0, file_table_size);
   large_vmhopper = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+file_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + file_table_start) & 127);
#endif
   for(n = 0; n<large_board_ranks; n++) {
      large_vmhopper[n] = (large_bitboard_t *)(memory + offset + file_table_start + n*file_table_offset);
   }

   memory = realloc(large_vcslider, file_table_size);
   assert(memory);
   memset(memory, 0, file_table_size);
   large_vcslider = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+file_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + file_table_start) & 127);
#endif
   for(n = 0; n<large_board_ranks; n++) {
      large_vcslider[n] = (large_bitboard_t *)(memory + offset + file_table_start + n*file_table_offset);
   }

   memory = realloc(large_vchopper, file_table_size);
   assert(memory);
   memset(memory, 0, file_table_size);
   large_vchopper = (large_bitboard_t **)memory;
#if defined __i386__
   offset = 0;
   if ((uint64_t)(memory+file_table_start)&127) offset = 128 - (size_t)((uint64_t)(memory + file_table_start) & 127);
#endif
   for(n = 0; n<large_board_ranks; n++) {
      large_vchopper[n] = (large_bitboard_t *)(memory + offset + file_table_start + n*file_table_offset);
   }
}



void initialise_large_slider_tables(void)
{
   int board_size = large_board_files * large_board_ranks;
   int file, rank, square;
   int occ_mask, occ;
   int n;

   /* Allocate memory for tables */
   alloc_large_slider_tables();

   /* Make sure large tables are initialised when pieces are defined */
   use_large_tables = true;

   /* Rank attacks */
   for (file = 0; file<large_board_files; file++) {
      for (occ = 0; occ < 1<<large_board_files; occ++) {
         /* Left of slider position, rook and cannon moves, rook
          * attacks.
          */
         for (n=file-1; n>=0; n--) {
            if ( occ & (1 << n) ) {
               large_hcslider[file][occ] |= large_square_bitboards[n];
               break;
            }

            large_hmslider[file][occ] |= large_square_bitboards[n];
         }
         n--;
         /* Cannon attacks */
         for (; n>=0; n--) {
            if ( occ & (1 << n) ) {
               large_hchopper[file][occ] |= large_square_bitboards[n];
               break;
            }
            large_hmhopper[file][occ] |= large_square_bitboards[n];
         }

         /* Right of slider position */
         for (n=file+1; n<large_board_files; n++) {
            if ( occ & (1 << n) ) {
               large_hcslider[file][occ] |= large_square_bitboards[n];
               break;
            }

            large_hmslider[file][occ] |= large_square_bitboards[n];
         }
         n++;
         /* Cannon attacks */
         for (; n<large_board_files; n++) {
            if ( occ & (1 << n) ) {
               large_hchopper[file][occ] |= large_square_bitboards[n];
               break;
            }
            large_hmhopper[file][occ] |= large_square_bitboards[n];
         }
      }
   }

   /* File attacks */
   for (rank = 0; rank < large_board_ranks; rank++) {
      for (occ = 0; occ < 1<<large_board_ranks; occ++) {
         /* South of slider position, rook and cannon moves, rook
          * attacks.
          */
         for (n=rank-1; n>=0; n--) {
            if ( occ & (1 << n) ) {
               large_vcslider[rank][occ] |= large_square_bitboards[large_board_files*n];
               break;
            }

            large_vmslider[rank][occ] |= large_square_bitboards[large_board_files*n];
         }
         n--;
         /* Cannon attacks */
         for (; n>=0; n--) {
            if ( occ & (1 << n) ) {
               large_vchopper[rank][occ] |= large_square_bitboards[large_board_files*n];
               break;
            }
            large_vmhopper[rank][occ] |= large_square_bitboards[large_board_files*n];
         }

         /* North of slider position */
         for (n=rank+1; n<large_board_ranks; n++) {
            if ( occ & (1 << n) ) {
               large_vcslider[rank][occ] |= large_square_bitboards[large_board_files*n];
               break;
            }

            large_vmslider[rank][occ] |= large_square_bitboards[large_board_files*n];
         }
         n++;
         /* Cannon attacks */
         for (; n<large_board_ranks; n++) {
            if ( occ & (1 << n) ) {
               large_vchopper[rank][occ] |= large_square_bitboards[large_board_files*n];
               break;
            }
            large_vmhopper[rank][occ] |= large_square_bitboards[large_board_files*n];
         }
      }
   }

   /* Include the sentinel square (the capture location) in the move destinations. This will be masked by the
    * move generator anyway and it will allow us a small optimisation by skipping a separate capture
    * generation for pieces that capture as they move (which is normal).
    */
   for (file = 0; file<large_board_files; file++) {
      for (occ = 0; occ < 1<<large_board_files; occ++) {
         large_hmslider[file][occ] |= large_hcslider[file][occ];
         large_hmhopper[file][occ] |= large_hchopper[file][occ];
      }
   }
   for (rank = 0; rank < large_board_ranks; rank++) {
      for (occ = 0; occ < 1<<large_board_ranks; occ++) {
         large_vmslider[rank][occ] |= large_vcslider[rank][occ];
         large_vmhopper[rank][occ] |= large_vchopper[rank][occ];
      }
   }

   /* Initialise superpiece attacks to a full board */
   for (n=0; n<board_size; n++)
      large_super[n] = large_board_all;

   /* Connecting rays */
   memset(large_connecting_ray, 0, sizeof large_connecting_ray);
   for (square = 0; square < board_size; square++) {
      int attack;
      for (attack = square+1; attack<=board_size; attack++) {
         int row = unpack_rank(square);
         int file = unpack_file(square);
         if (row == unpack_rank(attack)) {
            int n;
            for (n=file;n<=unpack_file(attack);n++)
               large_connecting_ray[square][attack] |=
                  large_square_bitboards[pack_row_file(row, n)];
         }
         if (file == unpack_file(attack)) {
            int n;
            for (n=row;n<=unpack_rank(attack);n++)
               large_connecting_ray[square][attack] |=
                  large_square_bitboards[pack_row_file(n, file)];
         }
         if (large_diagonal_nr[square] == large_diagonal_nr[attack]) {
            int n;
            for (n=square;n<=(attack);n+=large_board_files+1)
               large_connecting_ray[square][attack] |= large_square_bitboards[n];
         }
         if (large_anti_diagonal_nr[square] == large_anti_diagonal_nr[attack]) {
            int n;
            for (n=square;n<=(attack);n+=large_board_files-1)
               large_connecting_ray[square][attack] |= large_square_bitboards[n];
         }
         large_connecting_ray[attack][square] = large_connecting_ray[square][attack];
      }
   }

   /* Bitshifts for steppers */
   /* Bitshifts for all directions: N   NE  E   SE    S   SW    W   NW */
   large_step_shift[0] = large_board_files;        // N
   large_step_shift[1] = large_board_files+1;      // NE
   large_step_shift[2] = 1;                        // E
   large_step_shift[3] =-large_board_files+1;      // SE
   large_step_shift[4] =-large_board_files;        // S
   large_step_shift[5] =-large_board_files-1;      // SW
   large_step_shift[6] =-1;                        // W
   large_step_shift[7] = large_board_files-1;      // NW

   large_step_mask[0] = large_board_all;
   large_step_mask[1] = ~large_board_east_edge;
   large_step_mask[2] = ~large_board_east_edge;
   large_step_mask[3] = ~large_board_east_edge;
   large_step_mask[4] = large_board_all;
   large_step_mask[5] = ~large_board_west_edge;
   large_step_mask[6] = ~large_board_west_edge;
   large_step_mask[7] = ~large_board_west_edge;
}



/* Find a leaper with a particular move pattern */
static int find_leaper_description(int n, int m)
{
   int index = -1;
   int c;

   for (c=0; c<number_of_leapers; c++) {
      if (leaper_description[c] == (n | (m<<4))) { index = c; break; }
      if (leaper_description[c] == (m | (n<<4))) { index = c; break; }
   }

   return index;
}



/* Helper macro: mark all valid destinations for an (n, m) leaper */
#define update_leaper(n,m)\
   for (dy = -1; dy<=1; dy+=2) {                                              \
      for (dx = -1; dx<=1; dx+=2) {                                           \
         if ( (x+dx*n) < 0 || (y+dy*m) < 0 ) continue;                        \
         if ( (x+dx*n) < large_board_files && (y+dy*m) < large_board_ranks) { \
            int dest_sqr = pack_row_file(y+dy*m, x+dx*n);                     \
            if (sqr < 64)                                                     \
               leapers[number_of_leapers][sqr] |= make_bitboard_square(dest_sqr);\
            if (use_large_tables)                                             \
               large_leapers[number_of_leapers][sqr] |= large_square_bitboards[dest_sqr];\
         }                                                                    \
      }                                                                       \
   }

#define update_aleaper(leapers,large_leapers,n,m)\
   if ( (x+n) < 0 || (y+m) < 0 ) continue;                        \
   if ( (x+n) < large_board_files && (y+m) < large_board_ranks) { \
      int dest_sqr = pack_row_file(y+m, x+n);                     \
      if (sqr < 64)                                                     \
         leapers[number_of_leapers][sqr] |= make_bitboard_square(dest_sqr);\
      if (use_large_tables)                                             \
         large_leapers[number_of_leapers][sqr] |= large_square_bitboards[dest_sqr];\
   }

static move_flag_t define_asymmetric_leaper(const char *movestr)
{
   /* Clear the board */
   memset(&leapers[number_of_leapers][0], 0, 64*sizeof(bitboard_t));
   memset(&large_leapers[number_of_leapers][0], 0, 128*sizeof(large_bitboard_t));
   memset(&reverse_leapers[number_of_leapers][0], 0, 64*sizeof(bitboard_t));
   memset(&large_reverse_leapers[number_of_leapers][0], 0, 128*sizeof(large_bitboard_t));

   const char *s = movestr;
   char op = ' ';

   int index, index2, indexm, index_flags;
   index = index2 = indexm = index_flags = 0;
   uint8_t description = 0;

   s++;
   while (*s) {
      int n, m;

      while (*s && s[-1] != '(') s++;
      if(!*s) break;
      sscanf(s, "%d", &n);

      while (*s && s[-1] != ',') s++;
      if(!*s) break;
      sscanf(s, "%d", &m); s++;

      switch (op) {
         case '|':   /* Define a leaper with more than one type of move */
         case ' ':
            leaper_description[number_of_leapers] = n | (m<<4);
            index = number_of_leapers;
            index_flags |= 1;
            break;

         case '+':   /* A compound leaper, with two steps one after the other */
            /* Define a new type of leaper for the second step.
             * FIXME: check if this type was already defined and re-use.
             */
            index2 = find_leaper_description(n, m);
            if (index2 == -1) {
               number_of_leapers++;
               if (number_of_leapers >= MAX_LEAPER_TYPES)
                  return -1;
               index2 = number_of_leapers;
               leaper_description[number_of_leapers] = n | (m<<4);
            }
            index_flags |= 2;
            break;

         case '&':   /* A compound leaper, with a mask (used to implement "lame leapers") */
            /* Define a new type of leaper for the mask.
             * FIXME: check if this type was already defined and re-use.
             */
            indexm = find_leaper_description(n, m);
            if (indexm == -1) {
               number_of_leapers++;
               if (number_of_leapers >= MAX_LEAPER_TYPES)
                  return -1;
               indexm = number_of_leapers;
               leaper_description[number_of_leapers] = n | (m<<4);
            }
            index_flags |= 4;
            break;
      }

      if (op == '|')
         leaper_description[number_of_leapers] = 0xff; /* Invalid */

      /* Dimensions of the board */
      int w = 8;
      int h = 8;
      if (use_large_tables) {
         w = large_board_files;
         h = large_board_ranks;
      }

      int x, y, dx, dy;
      for (y=0; y<h; y++) {
         for (x=0; x<w; x++) {
            int sqr = x + y*w;
            /* White */
            update_aleaper(leapers, large_leapers, n, m);

            /* Black */
            update_aleaper(reverse_leapers, large_reverse_leapers, n, -m);
         }
      }

      s++;
      while (*s && *s == ')') s++;
      op = s[0];
   }

   number_of_leapers++;

   return (index | (index2 << 4) | (indexm << 8) | (index_flags << 12))<<16 | MF_LEAPER_ASYMM;
}

/* Define a leaper mask */
static move_flag_t define_leaper_mask(const char *movestr)
{
   if (!movestr || !*movestr) return 0;

   const char *s = movestr;
   int n, m;

   /* Find start of the mask description */
   while (*s && *s != '&') s++;
   if (!*s) return 0;
   s++;

   /* Find leaper step */
   while (*s && s[-1] != '(') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &n);

   while (*s && s[-1] != ',') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &m); s++;

   /* Check if a leaper with this move has already been defined */
   int index = find_leaper_description(n, m);

   /* If the leaper doesn't exist yet, create it.
    * Otherwise reuse it.
    */
   if (index == -1) {
      if (number_of_leapers >= MAX_LEAPER_TYPES)
         return -1;
      index = number_of_leapers;
      leaper_description[number_of_leapers] = n | (m<<4);

      int x, y, dx, dy;
      for (y=0; y<large_board_ranks; y++) {
         for (x=0; x<large_board_files; x++) {
            int sqr = x + y*large_board_files;
            update_leaper(n, m);
            update_leaper(m, n);
         }
      }

      number_of_leapers++;
   }

   return (index << 24) | MF_LEAPER_HAVEM;
}

/* Define the second step in a two-step leaper */
static move_flag_t define_second_leaper_step(const char *movestr)
{
   if (!movestr || !*movestr) return 0;

   const char *s = movestr;
   int n, m;

   /* Find start of the second leaper description description */
   while (*s && *s != '+') s++;
   if (!*s) return 0;
   s++;

   /* FIXME: we now assume that the second step has to be simple. This isn't necessary */

   /* Find leaper step */
   while (*s && s[-1] != '(') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &n);

   while (*s && s[-1] != ',') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &m); s++;

   /* Check if a leaper with this move has already been defined */
   int index = find_leaper_description(n, m);

   /* If the leaper doesn't exist yet, create it.
    * Otherwise reuse it.
    */
   if (index == -1) {
      if (number_of_leapers >= MAX_LEAPER_TYPES)
         return -1;
      index = number_of_leapers;
      leaper_description[number_of_leapers] = n | (m<<4);

      int x, y, dx, dy;
      for (y=0; y<large_board_ranks; y++) {
         for (x=0; x<large_board_files; x++) {
            int sqr = x + y*large_board_files;
            update_leaper(n, m);
            update_leaper(m, n);
         }
      }

      number_of_leapers++;
   }

   return (index << 20) | MF_LEAPER_HAVE2;
}

/* Define a simple leaper */
static move_flag_t define_simple_leaper(const char *movestr)
{
   if (!movestr || !*movestr) return 0;

   const char *s = movestr;
   int n, m;
   
   /* Find leaper step */
   while (*s && s[0] == '(') s++;
   while (*s && s[-1] != '(') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &n);

   while (*s && s[-1] != ',') s++;
   if(!*s) return 0;
   sscanf(s, "%d", &m); s++;

   /* Check if a leaper with this move has already been defined */
   int index = find_leaper_description(n, m);

   /* If the leaper doesn't exist yet, create it.
    * Otherwise reuse it.
    */
   if (index == -1) {
      if (number_of_leapers >= MAX_LEAPER_TYPES)
         return -1;
      index = number_of_leapers;
      leaper_description[number_of_leapers] = n | (m<<4);

      int x, y, dx, dy;
      for (y=0; y<large_board_ranks; y++) {
         for (x=0; x<large_board_files; x++) {
            int sqr = x + y*large_board_files;
            update_leaper(n, m);
            update_leaper(m, n);
         }
      }

      number_of_leapers++;
   }

   return (index << 16) | MF_IS_LEAPER;
}

/* Define a non-trivial leaper (compund leaper) */
static move_flag_t define_compound_leaper(const char *movestr)
{
   if (!movestr || !*movestr) return 0;

   /* We always allocate a new slot for these */
   if (number_of_leapers >= MAX_LEAPER_TYPES)
      return -1;
   int index = number_of_leapers;

   /* This slot can never be reused */
   leaper_description[number_of_leapers] = 0xff;

   const char *s = movestr;
   char op = ' ';
   while (*s) {
      int n, m;
      while (*s && !isdigit(s[0])) s++;
      if(!*s) break;
      sscanf(s, "%d", &n); s++;
      while (*s && !isdigit(s[0])) s++;
      if(!*s) break;
      sscanf(s, "%d", &m); s++;

      switch (op) {
         case '+':   /* A compound leaper, with two steps one after the other. That means we're done here. */
            goto done;
            break;

         case '&':   /* A compound leaper, with a mask (used to implement "lame leapers"). We're done here. */
            goto done;
      }

      /* Dimensions of the board */
      int w = large_board_files;
      int h = large_board_ranks;

      int x, y, dx, dy;
      for (y=0; y<h; y++) {
         for (x=0; x<w; x++) {
            int sqr = x + y*w;
            update_leaper(n, m);
            update_leaper(m, n);
         }
      }

      s++;
      while (*s && *s == ')') s++;
      op = s[0];
   }

done:

   number_of_leapers++;
   return (index << 16) | MF_IS_LEAPER;
}


move_flag_t define_leaper(const char *movestr)
{
   if (number_of_leapers >= MAX_LEAPER_TYPES)
      return -1;

   if (!movestr)
      return -1;

   /* Asymmetric? */
   if (strstr(movestr, "aleap") == movestr)
      return define_asymmetric_leaper(movestr);

   /* Clear the board */
   memset(&leapers[number_of_leapers][0], 0, 64*sizeof(bitboard_t));
   memset(&large_leapers[number_of_leapers][0], 0, 128*sizeof(large_bitboard_t));

   move_flag_t flags = 0;

   movestr += 5;
   if (strstr(movestr, "|"))
      flags |= define_compound_leaper(movestr);
   else
      flags |= define_simple_leaper(movestr);

   if (strstr(movestr, "+"))
      flags |= define_second_leaper_step(movestr);

   if (strstr(movestr, "&"))
      flags |= define_leaper_mask(movestr);

   return flags;
}

move_flag_t define_slider(const char *movestr)
{
   const char *s = movestr;
   move_flag_t flags = 0;

   while (*s && isspace(*s)) s++;

   if (*s == '\0')
      return 0;

   int shift = 0;

   while (*s) {
      if (strstr(s, "slide") == s)
         shift = 0;

      if (strstr(s, "hop") == s)
         shift = 4;

      switch (*s) {
         case 'H':
            flags |= MF_SLIDER_H << shift;
            break;
         case 'V':
            flags |= MF_SLIDER_V << shift;
            break;
         case 'D':
            flags |= MF_SLIDER_D << shift;
            break;
         case 'A':
            flags |= MF_SLIDER_A << shift;
            break;
         case ')':
            shift = 0;
            break;
         default:
            break;
      }
      s++;
   }

   return flags;
}

move_flag_t define_stepper(const char *movestr)
{
   const char *s = movestr;

   /* Dimensions of the board */
   int w = 8;
   int h = 8;
   if (use_large_tables) {
      w = large_board_files;
      h = large_board_ranks;
   }

   if (number_of_steppers >= MAX_STEPPER_TYPES)
      return 0;

   if (!movestr)
      return 0;

   while (*s && isspace(*s)) s++;

   if (*s == '\0')
      return 0;

   if (strstr(s, "step ") != s)
      return 0;
   s+=5;

   stepper_description[number_of_steppers] = 0;
   while (*s) {
      int count = 1;
      int shift = 0;

      if (isdigit(*s)) {
         sscanf(s, "%d", &count);
         assert(count < 8);
         assert(count >= 0);
         s++;
      }

      if (strstr(s, "NE") == s) shift = 3;
      else if (strstr(s, "NW") == s) shift = 21;
      else if (strstr(s, "SE") == s) shift = 9;
      else if (strstr(s, "SW") == s) shift = 15;
      else if (strstr(s, "N") == s)  shift = 0;
      else if (strstr(s, "E") == s)  shift = 6;
      else if (strstr(s, "S") == s)  shift = 12;
      else if (strstr(s, "W") == s)  shift = 18;

      stepper_description[number_of_steppers] |= count << shift;
      
      while(*s && *s != ',')
         s++;
      if (*s) s++;
      while (*s && isspace(*s)) s++;
   }

   /* Calculate single step bitboards, mainly used for mobility calculation, since normal single steppers are
    * generated using bulk shifts.
    */
   int n, d;
   for (n=0; n<w*h; n++) {
      if (n < 64) stepper_step[number_of_steppers][n] = board_empty;
      large_stepper_step[number_of_steppers][n] = large_board_empty;
      for (d=0; d<8; d++) {
         int c = (stepper_description[number_of_steppers] >> (d*3)) & 7;
         bitboard_t moves = board_empty;
         large_bitboard_t large_moves = large_square_bitboards[n];
         if (n < 64) moves = make_bitboard_square(n);
         
         for ( ; c>0; c--) {
            moves &= step_mask[d];
            moves = sshift64(moves, step_shift[d]);
            if (n<64) stepper_step[number_of_steppers][n] |= moves;

            large_moves &= large_step_mask[d];
            large_moves = sshift128(large_moves, large_step_shift[d]);
            large_stepper_step[number_of_steppers][n] |= large_moves;
         }
      }
   }

   number_of_steppers++;

   return (number_of_steppers-1) << 8;
}

move_flag_t define_piece_move(const char *movestr)
{
   if (!movestr) return 0;
   const char *s = movestr;
   while (isspace(*s)) s++;
   if (s[0] == '\0') return 0;

   /* What type of mover is this? */
   if (strstr(s, "none") == s)
      return 0;
   if (strstr(s, "slide") == s)
      return define_slider(s);
   if (strstr(s, "hop") == s)
      return define_slider(s);
   if (strstr(s, "step") == s)
      return define_stepper(s);
   if (strstr(s, "aleap") == s)
      return define_leaper(s);
   if (strstr(s, "leap") == s)
      return define_leaper(s);

   return 0;
}

/* Initialise attack tables for the super piece. Don't worry about steppers, since sliders are already
 * taken into account and this will automatically take into account steppers.
 */
void initialise_super_tables(void)
{
   int n;

   for(n=0; n<64; n++) {
      super[n] = get_rank_attacks(hmslider, 0, n)     | get_file_attacks(hmslider, 0, n) |
                 get_diagonal_attacks(hmslider, 0, n) | get_antidiagonal_attacks(hmslider, 0, n);
      int c;
      for (c=0; c<number_of_leapers; c++)
         super[n] |= leapers[c][n];
      super[n] &= board_all;
   }

   if (use_large_tables) {
      for(n=0; n<large_board_files*large_board_ranks; n++) {
         large_super[n] = get_large_rank_attacks(large_hmslider, large_board_empty, n)     |
                          get_large_file_attacks(large_vmslider, large_board_empty, n)     |
                          get_large_diagonal_attacks(large_hmslider, large_board_empty, n) |
                          get_large_antidiagonal_attacks(large_hmslider, large_board_empty, n);
         int c;
         for (c=0; c<number_of_leapers; c++)
            large_super[n] |= large_leapers[c][n];
         large_super[n] &= large_board_all;
      }
   }
}

/* Apply a mask to a leaper (so we can make "trapped leapers"). This is a bit spotty at the moment in the
 * sense that it only works reliably for leapers that already have a mask.
 * (We're not constraining the first steps directly because we're likely to reuse the flags for that; the
 * correct implementation for unconstrained leapers is to add a mask to their description).
 */
move_flag_t set_leaper_mask(move_flag_t flags, bitboard_t mask)
{
   move_flag_t f = flags;
   int index;

   if (number_of_leapers >= MAX_LEAPER_TYPES) return -1;

   if (flags & MF_LEAPER_HAVEM) {
      f >>= 24;
      index = f & 0xf;
      int square;
      for (square=0; square<128; square++)
         leapers[number_of_leapers][square] = leapers[index][square] & mask;
      flags &= ~MF_LEAPER_MASK;
      flags |= MF_LEAPER_HAVEM | (number_of_leapers << 24);
   } else {
      int square;
      if (flags & MF_LEAPER_HAVE2) {
         /* Add a mask */
         for (square=0; square<large_board_files*large_board_ranks; square++)
            leapers[number_of_leapers][square] = mask;
         flags |= MF_LEAPER_HAVEM | (number_of_leapers << 24);
      } else {
         /* Apply restriction to the move definition itself */
         f >>= 16;
         index = f & 0xf;
         int square;
         if (flags & MF_LEAPER_ASYMM) {
            for (square=0; square<128; square++) {
               leapers[number_of_leapers][square] = leapers[index][square] & mask;
               reverse_leapers[number_of_leapers][square] = reverse_leapers[index][square] & mask;
            }
         } else {
            for (square=0; square<128; square++)
               leapers[number_of_leapers][square] = leapers[index][square] & mask;
         }
         flags &= ~MF_LEAPER;
         flags |= MF_IS_LEAPER | (number_of_leapers << 16);
      }
   }

   /* Allocate a new index for the mask */
   leaper_description[number_of_leapers] = 0xff; /* Invalid */
   number_of_leapers++;

   return flags;
}

/* Apply a mask to a leaper (so we can make "trapped leapers"). This is a bit spotty at the moment in the
 * sense that it only works reliably for leapers that already have a mask.
 * (We're not constraining the first steps directly because we're likely to reuse the flags for that; the
 * correct implementation for unconstrained leapers is to add a mask to their description).
 */
move_flag_t set_large_leaper_mask(move_flag_t flags, large_bitboard_t mask)
{
   move_flag_t f = flags;
   int index;

   if (number_of_leapers >= MAX_LEAPER_TYPES) return -1;

   if (flags & MF_LEAPER_HAVEM) {
      f >>= 24;
      index = f & 0xf;
      int square;
      for (square=0; square<128; square++)
         large_leapers[number_of_leapers][square] = large_leapers[index][square] & mask;
      flags &= ~MF_LEAPER_MASK;
      flags |= MF_LEAPER_HAVEM | (number_of_leapers << 24);
   } else {
      int square;
      if (flags & MF_LEAPER_HAVE2) {
         /* Add a mask */
         for (square=0; square<large_board_files*large_board_ranks; square++)
            large_leapers[number_of_leapers][square] = mask;
         flags |= MF_LEAPER_HAVEM | (number_of_leapers << 24);
      } else {
         /* Apply restriction to the move definition itself */
         f >>= 16;
         index = f & 0xf;
         int square;
         if (flags & MF_LEAPER_ASYMM) {
            for (square=0; square<128; square++) {
               large_leapers[number_of_leapers][square] = large_leapers[index][square] & mask;
               large_reverse_leapers[number_of_leapers][square] = large_reverse_leapers[index][square] & mask;
            }
         } else {
            for (square=0; square<128; square++)
               large_leapers[number_of_leapers][square] = large_leapers[index][square] & mask;
         }
         flags &= ~MF_LEAPER;
         flags |= MF_IS_LEAPER | (number_of_leapers << 16);
      }
   }

   /* Allocate a new index for the mask */
   leaper_description[number_of_leapers] = 0xff; /* Invalid */
   number_of_leapers++;

   return flags;
}



/* Deduce castle flags from king positions and destinations and rook locations. */
void deduce_castle_flags(bool large_board, sides side, int king_from, int king_to, int rook_from)
{
   /* King-side or queen side? */
   bool king_side = (unpack_file(king_to) >= large_board_files/2);
   int rook_to = king_side ? (king_to - 1) : (king_to + 1);
   int c, c_first, c_last;

   /* FIXME: it is not enough that the king and rook have a clear path
    * between them: the path to the destination squares needs to be cleared
    * as well.
    * This is implied in normal chess, but not in FRC.
    */
   if (king_side) {
      short_castle_mask[side]       = make_bitboard_square(king_from)   | make_bitboard_square(rook_from);
      large_short_castle_mask[side] = large_square_bitboards[king_from] | large_square_bitboards[rook_from];
      short_castle_free[side]       = board_empty;
      large_short_castle_free[side] = large_board_empty;

      /* The path of the King */
      c_first = min(king_from, king_to);
      c_last  = max(king_from, king_to);
      for (c = c_first; c <= c_last; c++) {
         short_castle_free[side]       |= make_bitboard_square(c);
         large_short_castle_free[side] |= large_square_bitboards[c];
      }
      short_castle_safe[side] = short_castle_free[side] | make_bitboard_square(king_from);
      large_short_castle_safe[side] = large_short_castle_free[side] | large_square_bitboards[king_from];

      /* The path of the Rook */
      c_first = min(rook_to, rook_from);
      c_last  = max(rook_to, rook_from);
      for (c = c_first; c <= c_last; c++) {
         short_castle_free[side]       |= make_bitboard_square(c);
         large_short_castle_free[side] |= large_square_bitboards[c];
      }

      /* Make sure the king and rook are not marked on the "free" bitboard.
       * Makes no difference for normal chess, but does affect FRC.
       */
      short_castle_free[side] &= ~short_castle_mask[side];
      large_short_castle_free[side] &= ~large_short_castle_mask[side];
      short_castle_king_dest[side] = king_to;
   } else {
      long_castle_mask[side]       = make_bitboard_square(king_from)   | make_bitboard_square(rook_from);
      large_long_castle_mask[side] = large_square_bitboards[king_from] | large_square_bitboards[rook_from];
      long_castle_free[side]       = board_empty;
      large_long_castle_free[side] = large_board_empty;

      /* The path of the King */
      c_last  = max(king_from, king_to);
      c_first = min(king_from, king_to);
      for (c = c_last; c >= c_first; c--) {
         long_castle_free[side]       |= make_bitboard_square(c);
         large_long_castle_free[side] |= large_square_bitboards[c];
      }
      long_castle_safe[side] = long_castle_free[side];
      large_long_castle_safe[side] = large_long_castle_free[side];

      /* The path of the Rook */
      c_first = min(rook_from+1, rook_to);
      c_last  = max(rook_from+1, rook_to);
      for (c = c_first; c <= c_last; c++) {
         long_castle_free[side]       |= make_bitboard_square(c);
         large_long_castle_free[side] |= large_square_bitboards[c];
      }

      /* Make sure the king and rook are not marked on the "free" bitboard.
       * Makes no difference for normal chess, but does affect FRC.
       */
      long_castle_free[side] &= ~long_castle_mask[side];
      large_long_castle_free[side] &= ~large_long_castle_mask[side];
      long_castle_king_dest[side] = king_to;
   }
}



bitboard_t get_leaper_moves(move_flag_t flags, bitboard_t occ, int square)
{
   bitboard_t moves = 0;
   move_flag_t f = flags;
   int index;

   f >>= 16;

   /* First step */
   index = f & 0xf;
   moves |= leapers[index][square];

   /* Second step, for complicated leapers; only if the first step was free */
   f >>= 4;
   if (flags & MF_LEAPER_HAVE2) {
      bitboard_t bb = moves & ~occ;
      index = f & 0xf;
      while (bb) {
         int square = bitscan64(bb);
         bb ^= make_bitboard_square(square);
         moves |= leapers[index][square];
      }
   }

   /* Apply mask */
   f >>= 4;
   if (flags & MF_LEAPER_HAVEM) {
      index = f & 0xf;
      moves &= leapers[index][square];
   }

   return moves;
}

large_bitboard_t get_large_leaper_moves(move_flag_t flags, large_bitboard_t occ, int square)
{
   large_bitboard_t moves = large_board_empty;
   move_flag_t f = flags;
   int index;

   f >>= 16;

   /* First step */
   index = f & 0xf;
   moves |= large_leapers[index][square];

   /* Second step, for complicated leapers; only if the first step was free */
   f >>= 4;
   if (flags & MF_LEAPER_HAVE2) {
      large_bitboard_t bb = moves & ~occ;
      index = f & 0xf;
      while (!is_zero128(bb)) {
         int square = bitscan128(bb);
         bb ^= large_square_bitboards[square];
         moves |= large_leapers[index][square];
      }
   }

   /* Apply mask */
   f >>= 4;
   if (flags & MF_LEAPER_HAVEM) {
      index = f & 0xf;
      moves &= large_leapers[index][square];
   }

   return moves;
}

bitboard_t get_aleaper_moves(move_flag_t flags, bitboard_t occ, int square, sides side)
{
   bitboard_t moves = 0;
   move_flag_t f = flags;
   int index;

   f >>= 16;

   /* First step */
   index = f & 0xf;
   if (side == WHITE) {
      moves |= leapers[index][square];
   } else {
      moves |= reverse_leapers[index][square];
   }

   /* Second step, for complicated leapers; only if the first step was free */
   f >>= 4;
   if (flags & MF_LEAPER_HAVE2) {
      bitboard_t bb = moves & ~occ;
      index = f & 0xf;
      while (!bb) {
         int square = bitscan64(bb);
         bb ^= make_bitboard_square(square);
         moves |= leapers[index][square];
      }
   }

   /* Apply mask */
   f >>= 4;
   if (flags & MF_LEAPER_HAVEM) {
      index = f & 0xf;
      moves &= leapers[index][square];
   }

   return moves;
}


large_bitboard_t get_large_aleaper_moves(move_flag_t flags, large_bitboard_t occ, int square, sides side)
{
   large_bitboard_t moves = large_board_empty;
   move_flag_t f = flags;
   int index;

   f >>= 16;

   /* First step */
   index = f & 0xf;
   if (side == WHITE) {
      moves |= large_leapers[index][square];
   } else {
      moves |= large_reverse_leapers[index][square];
   }

   /* Second step, for complicated leapers; only if the first step was free */
   f >>= 4;
   if (flags & MF_LEAPER_HAVE2) {
      large_bitboard_t bb = moves & ~occ;
      index = f & 0xf;
      while (!is_zero128(bb)) {
         int square = bitscan128(bb);
         bb ^= large_square_bitboards[square];
         moves |= large_leapers[index][square];
      }
   }

   /* Apply mask */
   f >>= 4;
   if (flags & MF_LEAPER_HAVEM) {
      index = f & 0xf;
      moves &= large_leapers[index][square];
   }

   return moves;
}

bitboard_t get_stepper_move_bitboard(move_flag_t flags, bitboard_t occ, bitboard_t steppers)
{
   bitboard_t moves = board_empty;
   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for ( ; c>0; c--) {
         dmoves &= step_mask[d];
         dmoves = sshift64(dmoves, step_shift[d]);
         dmoves &= ~occ;
      }
      moves |= dmoves;
   }

   return moves;
}

bitboard_t get_stepper_move_span_bitboard(move_flag_t flags, bitboard_t occ, bitboard_t steppers)
{
   bitboard_t moves = board_empty;
   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for (c=0; c<large_board_ranks; c++) {
         dmoves &= step_mask[d];
         dmoves = sshift64(dmoves, step_shift[d]);
         dmoves &= ~occ;
         moves |= dmoves;
      }
   }

   return moves;
}

bitboard_t get_stepper_attack_bitboard(move_flag_t flags, bitboard_t occ, bitboard_t steppers)
{
   bitboard_t attack = board_empty;

   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for ( ; c>0; c--) {
         dmoves &= step_mask[d];
         dmoves = sshift64(dmoves, step_shift[d]);
         attack |= dmoves;
         dmoves &= ~occ;
      }
   }

   return attack;
}

large_bitboard_t get_large_stepper_move_bitboard(move_flag_t flags, large_bitboard_t occ, large_bitboard_t steppers)
{
   large_bitboard_t moves = large_board_empty;
   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      large_bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for ( ; c>0; c--) {
         dmoves &= large_step_mask[d];
         dmoves = sshift128(dmoves, large_step_shift[d]);
         dmoves &= ~occ;
      }
      moves |= dmoves;
   }

   return moves;
}

large_bitboard_t get_large_stepper_move_span_bitboard(move_flag_t flags, large_bitboard_t occ, large_bitboard_t steppers)
{
   large_bitboard_t moves = large_board_empty;
   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      large_bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for (c=0; c<large_board_ranks; c++) {
         dmoves &= large_step_mask[d];
         dmoves = sshift128(dmoves, large_step_shift[d]);
         dmoves &= ~occ;
         moves |= dmoves;
      }
   }

   return moves;
}

large_bitboard_t get_large_stepper_attack_bitboard(move_flag_t flags, large_bitboard_t occ, large_bitboard_t steppers)
{
   large_bitboard_t attack = large_board_empty;

   /* Check for single stepper moves, which are generated in parallel */
   int si = (flags & MF_STEPPER) >> 8;
   int d;
   for (d=0; d<8; d++) {
      int c = (stepper_description[si] >> (d*3)) & 7;
      large_bitboard_t dmoves = steppers;

      if (c == 0) continue;

      /* We have a repetition count, so we do a number of steps one after the other.
       * This can effectively duplicate a slider.
       */
      for ( ; c>0; c--) {
         dmoves &= large_step_mask[d];
         dmoves = sshift128(dmoves, large_step_shift[d]);
         attack |= dmoves;
         dmoves &= ~occ;
      }
   }

   return attack;
}

static bitboard_t generate_move_bitboard_mask(const board_t *board, bitboard_t source_mask, bitboard_t destination_mask, sides side_to_move, uint16_t allowed_promotion_pieces)
{
   piece_description_t *piece_types;
   move_flag_t *piece_capture_flags;
   move_flag_t *piece_move_flags;
   move_flag_t *special_move_flags;
   bitboard_t *special_zone;
   bitboard_t own, enemy, own_movers;
   bitboard_t occupied;
   bitboard_t move_bitboard;
   move_t *move;
   int n;

   piece_types = board->piece_types;
   piece_capture_flags = piece_types->piece_capture_flags;
   piece_move_flags = piece_types->piece_move_flags;
   special_zone = piece_types->piece_special_zone;
   special_move_flags = piece_types->piece_special_move_flags;

   own = board->bbc[side_to_move];
   enemy = board->bbc[next_side[side_to_move]];
   occupied = or_bitboards(own, enemy);

   own_movers = own & source_mask;
   move_bitboard = 0;

   /* Now generate moves for all pieces; only scan our own pieces. This mainly helps variants with different
    * armies.
    */
   bitboard_t movers = own_movers;
   while (movers) {
      int first_mover = bitscan64(movers);
      n = board->piece[first_mover];
      movers &= ~board->bbp[n];

      assert(board->bbp[n] & make_bitboard_square(first_mover));

      bitboard_t bb = own_movers & board->bbp[n];
      int piece = piece_for_side(n, side_to_move);

      /* Check for stepper moves, which are generated in parallel */
      if (piece_move_flags[n] & MF_STEPPER) {
         int si = (piece_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            bitboard_t moves = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= step_mask[d];
               moves = sshift64(moves, step_shift[d]);
               moves &= ~occupied;
               move_bitboard |= moves & destination_mask;
            }
         }
      }

      /* Double pawn steps (generalised) */
      if (special_zone[n] && special_move_flags[n] & MF_STEPPER) {
         int si = (special_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            if (!c) continue;
            bitboard_t moves = special_zone[n];
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= step_mask[d];
               moves = sshift64(moves, step_shift[d]);
               moves &= ~occupied;
            }

            move_bitboard |= moves;
         }
      }

      /* Now determine slider and leaper moves for this piece type - if it has any */
      if (piece_move_flags[n] & (MF_SLIDER|MF_IS_LEAPER) ||
          special_move_flags[n] & (MF_SLIDER|MF_IS_LEAPER)) {
         while (bb) {
            bitboard_t moves = 0;
            int from = bitscan64(bb);
            move_flag_t move_flags = piece_move_flags[n];
            bb ^= make_bitboard_square(from);

            /* Special moves; mainly for pawn-type pieces */
            if (special_zone[n]&make_bitboard_square(from))
               move_flags = special_move_flags[n];

            /* Determine the movement type for this piece */
            /* Get all regular moves
             * Test common combinations more efficiently.
             */
            switch (move_flags & (MF_SLIDER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  moves |= get_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  moves |= get_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  moves |= get_rank_attacks(hmslider, occupied, from) |
                           get_file_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_diagonal_attacks(hmslider, occupied, from) |
                           get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_rank_attacks(hmslider, occupied, from) |
                           get_file_attacks(hmslider, occupied, from) |
                           get_diagonal_attacks(hmslider, occupied, from) |
                           get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               default:
                  if (move_flags & MF_SLIDER_H) moves |= get_rank_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_V) moves |= get_file_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_D) moves |= get_diagonal_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_A) moves |= get_antidiagonal_attacks(hmslider, occupied, from);
                  if (move_flags & MF_IS_LEAPER) moves |= get_leaper_moves(move_flags, occupied, from);
            }


            /* Mask out occupied squares from normal moves, only capture enemy pieces */
            moves &= ~occupied;
            moves &= destination_mask;

            move_bitboard |= moves;
         }
      }
   }

done:

   return move_bitboard;
}

static bitboard_t generate_attack_bitboard_mask(const board_t *board, bitboard_t source_mask, bitboard_t destination_mask, sides side_to_move, uint16_t allowed_promotion_pieces)
{
   piece_description_t *piece_types;
   move_flag_t *piece_capture_flags;
   move_flag_t *piece_move_flags;
   move_flag_t *special_move_flags;
   bitboard_t *special_zone;
   bitboard_t own, enemy, own_movers;
   bitboard_t occupied;
   bitboard_t attacked;
   int n;

   piece_types = board->piece_types;
   piece_capture_flags = piece_types->piece_capture_flags;
   piece_move_flags = piece_types->piece_move_flags;
   special_zone = piece_types->piece_special_zone;
   special_move_flags = piece_types->piece_special_move_flags;

   own = board->bbc[side_to_move];
   enemy = board->bbc[next_side[side_to_move]];

   /* FIXME: doesn't take into account more than two sides yet... */
   occupied = or_bitboards(own, enemy);

   own_movers = own & source_mask;
   attacked = 0;

   /* Now generate moves for all pieces; only scan our own pieces. This mainly helps variants with different
    * armies.
    */
   bitboard_t movers = own_movers;
   while (movers) {
      int first_mover = bitscan64(movers);
      n = board->piece[first_mover];
      movers &= ~board->bbp[n];

      assert(board->bbp[n] & make_bitboard_square(first_mover));

      bitboard_t bb = own_movers & board->bbp[n];
      int piece = piece_for_side(n, side_to_move);

      /* Check for stepper moves, which are generated in parallel */
      if (bb && piece_capture_flags[n] & MF_STEPPER) {
         int si = (piece_capture_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            bitboard_t captures = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               captures &= step_mask[d];
               captures = sshift64(captures, step_shift[d]);

               attacked |= captures;
               captures &= ~occupied;
            }
         }
      }

      /* Now determine slider and leaper moves for this piece type - if it has any */
      if (piece_capture_flags[n] & (MF_SLIDER|MF_IS_LEAPER|MF_LEAPER_ASYMM)) {
         while (bb) {
            bitboard_t captures = 0;
            int from = bitscan64(bb);
            move_flag_t move_flags = piece_move_flags[n];
            bb ^= make_bitboard_square(from);

            /* Get all regular attacks (including those on empty squares)
             * Test common combinations more efficiently.
             */
            switch (piece_capture_flags[n] & (MF_SLIDER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  captures |= get_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  captures |= get_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  captures |= get_rank_attacks(hmslider, occupied, from) |
                              get_file_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_diagonal_attacks(hmslider, occupied, from) |
                              get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_rank_attacks(hmslider, occupied, from) |
                              get_file_attacks(hmslider, occupied, from) |
                              get_diagonal_attacks(hmslider, occupied, from) |
                              get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               default:
                  if (piece_capture_flags[n] & MF_SLIDER_H) captures |= get_rank_attacks(hmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_V) captures |= get_file_attacks(hmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_D) captures |= get_diagonal_attacks(hmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_A) captures |= get_antidiagonal_attacks(hmslider, occupied, from);

                  if (piece_capture_flags[n] & MF_IS_LEAPER) captures |= get_leaper_moves(move_flags, occupied, from);
            }

            /* Mark attacked squares */
            attacked |= captures;
         }
      }
   }

done:

   return attacked & destination_mask;
}



static large_bitboard_t generate_large_attack_bitboard_mask(const board_t *board, large_bitboard_t source_mask, large_bitboard_t destination_mask, sides side_to_move, uint16_t allowed_promotion_pieces)
{
   piece_description_t *piece_types;
   move_flag_t *piece_capture_flags;
   move_flag_t *piece_move_flags;
   move_flag_t *special_move_flags;
   large_bitboard_t *special_zone;
   large_bitboard_t own, enemy, own_movers;
   large_bitboard_t occupied;
   large_bitboard_t attacked;
   int n;

   piece_types = board->piece_types;
   piece_capture_flags = piece_types->piece_capture_flags;
   piece_move_flags = piece_types->piece_move_flags;
   special_zone = piece_types->piece_large_special_zone;
   special_move_flags = piece_types->piece_special_move_flags;

   own = board->large_bbc[side_to_move];
   enemy = board->large_bbc[next_side[side_to_move]];

   occupied = own | enemy;

   own_movers = own & source_mask;
   attacked = large_board_empty;

   /* Now generate moves for all pieces; only scan our own pieces. This mainly helps variants with different
    * armies.
    */
   large_bitboard_t movers = own_movers;
   while (!is_zero128(movers)) {
      int first_mover = bitscan128(movers);
      n = board->piece[first_mover];
      movers &= ~board->large_bbp[n];

      assert(!is_zero128(board->large_bbp[n] & large_square_bitboards[first_mover]));

      large_bitboard_t bb = own_movers & board->large_bbp[n];
      int piece = piece_for_side(n, side_to_move);

      /* Check for stepper moves, which are generated in parallel */
      if (!is_zero128(bb) && piece_capture_flags[n] & MF_STEPPER) {
         int si = (piece_capture_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            large_bitboard_t captures = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               captures &= large_step_mask[d];
               captures = sshift128(captures, large_step_shift[d]);

               attacked |= captures;
               captures &= ~occupied;
            }
         }
      }

      /* Now determine slider and leaper moves for this piece type - if it has any */
      if (piece_capture_flags[n] & (MF_SLIDER|MF_HOPPER|MF_IS_LEAPER|MF_LEAPER_ASYMM)) {
         while (!is_zero128(bb)) {
            large_bitboard_t captures = large_board_empty;
            int from = bitscan128(bb);
            move_flag_t move_flags = piece_move_flags[n];
            bb ^= large_square_bitboards[from];

            /* Get all regular attacks (including those on empty squares)
             * For sliders, the attacekd piece is included in the move table, so we don't need to explicitly
             * generate the capture.
             * Test common combinations more efficiently.
             * FIXME: does not handle hoppers properly (?).
             */
            switch (piece_capture_flags[n] & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  captures |= get_large_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  captures |= get_large_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_HOPPER_H | MF_HOPPER_V):
                  captures |= get_large_rank_attacks(large_hmhopper, occupied, from) |
                              get_large_file_attacks(large_vmhopper, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  captures |= get_large_rank_attacks(large_hmslider, occupied, from) |
                              get_large_file_attacks(large_vmslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_large_diagonal_attacks(large_hmslider, occupied, from) |
                              get_large_antidiagonal_attacks(large_hmslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_large_rank_attacks(large_hmslider, occupied, from) |
                              get_large_file_attacks(large_vmslider, occupied, from) |
                              get_large_diagonal_attacks(large_hmslider, occupied, from) |
                              get_large_antidiagonal_attacks(large_hmslider, occupied, from);
                  break;
               default:
                  if (piece_capture_flags[n] & MF_HOPPER_H) captures |= get_large_rank_attacks(large_hmhopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_V) captures |= get_large_file_attacks(large_vmhopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_D) captures |= get_large_diagonal_attacks(large_hmhopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_A) captures |= get_large_antidiagonal_attacks(large_hmhopper, occupied, from);

                  if (piece_capture_flags[n] & MF_SLIDER_H) captures |= get_large_rank_attacks(large_hmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_V) captures |= get_large_file_attacks(large_vmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_D) captures |= get_large_diagonal_attacks(large_hmslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_A) captures |= get_large_antidiagonal_attacks(large_hmslider, occupied, from);

                  if (piece_capture_flags[n] & MF_IS_LEAPER) captures |= get_large_leaper_moves(move_flags, occupied, from);
            }

            /* Mark attacked squares */
            attacked |= captures;
         }
      }
   }

done:

   return attacked & destination_mask;
}



/* Generate moves on an 8x8 board */
static void generate_moves_mask(movelist_t *movelist, const board_t *board, bitboard_t source_mask, bitboard_t destination_mask, sides side_to_move, uint16_t allowed_promotion_pieces)
{
   piece_description_t *piece_types;
   move_flag_t *piece_capture_flags;
   move_flag_t *piece_move_flags;
   move_flag_t *special_move_flags;
   bitboard_t *special_zone;
   bitboard_t own, enemy, own_movers;
   bitboard_t occupied;
   bitboard_t attacked;
   move_t *move;
   int n;

   piece_types = board->piece_types;
   piece_capture_flags = piece_types->piece_capture_flags;
   piece_move_flags = piece_types->piece_move_flags;
   special_zone = piece_types->piece_special_zone;
   special_move_flags = piece_types->piece_special_move_flags;

   /* Bookkeeping: we keep a pointer to the next move in the move list, and
    * update the number of moves in the list at the end of this function
    */
   move = &(movelist->move[movelist->num_moves]);

   own = board->bbc[side_to_move];
   enemy = board->bbc[next_side[side_to_move]];

   /* FIXME: doesn't take into account more than two sides yet... */
   occupied = or_bitboards(own, enemy);

   own_movers = own & source_mask;

   /* Generate drops */
   if (board->use_holdings) {
      bool dropped = false;
      for (n=0; n<piece_types->num_piece_types; n++) {
         if (board->holdings[n][side_to_move]) {
            dropped = true;

            int piece = piece_for_side(n, side_to_move);
            bitboard_t bb = destination_mask & ~occupied & board->drop_zone[side_to_move] & piece_types->piece_drop_zone[n];
            while (bb) {
               int to = bitscan64(bb);
               bb ^= make_bitboard_square(to);
               *move = encode_drop_move(piece, to);
               *move = add_move_retrieve(*move, piece, 1);
               move++;
            }
         }
      }

      /* Break out early if drops are possible and are forced if possible; no other moves are legal. */
      if (dropped && (board->rule_flags & RF_FORCE_DROPS))
         goto done;
   }

   /* Now generate moves for all pieces; only scan our own pieces. This mainly helps variants with different
    * armies.
    */
   bitboard_t movers = own_movers;
   while (movers) {
      int first_mover = bitscan64(movers);
      n = board->piece[first_mover];
      movers &= ~board->bbp[n];

      assert(board->bbp[n] & make_bitboard_square(first_mover));

      bitboard_t ep_capture = board_empty;

      if (piece_types->piece_flags[n] & PF_TAKE_EP && board->ep)
         ep_capture = make_bitboard_square(board->ep);

      bitboard_t bb = own_movers & board->bbp[n];
      int piece = piece_for_side(n, side_to_move);

      /* Check for stepper moves, which are generated in parallel */
      if (bb && piece_move_flags[n] & MF_STEPPER) {
         int si = (piece_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            bitboard_t moves = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= step_mask[d];
               moves = sshift64(moves, step_shift[d]);
               moves &= ~occupied;

               /* Scan all bits */
               bitboard_t bb = moves & destination_mask;
               while (bb) {
                  int to = bitscan64(bb);
                  int from = to - c*step_shift[d];
                  bb ^= make_bitboard_square(to);

                  assert(board->bbp[n] & board->bbc[side_to_move] & make_bitboard_square(from));

                  /* Check for promotions
                   * When the piece moves into the promotion zone, it will get promoted to one of the allowed
                   * promotion pieces, which can be different for each piece type (and further restricted, for
                   * instance during Q-search).
                   * Promotion to a royal piece is only allowed if the number of royal pieces a player has is
                   * smaller than the maximum number of royal pieces.
                   */
                  if (piece_types->piece_promotion_zone[n] & make_bitboard_square(to)) {
                     uint16_t c = piece_types->piece_promotion_choice[n] & allowed_promotion_pieces;
                     while (c) {
                        int tpiece = bitscan16(c);
                        c ^= 1<<tpiece;
                        if (piece_types->piece_maximum[tpiece][side_to_move] == 128 ||
                            popcount64(board->bbp[tpiece] & own) < piece_types->piece_maximum[tpiece][side_to_move]) {
                           tpiece = piece_for_side(tpiece, side_to_move);
                           *move = encode_normal_promotion(piece, from, to, tpiece); move++;
                        }
                     }
                     /* If promotions are optional, we also encode a normal move */
                     if (piece_types->piece_optional_promotion_zone[n] & make_bitboard_square(to)) {
                        *move = encode_normal_move(piece, from, to);
                        if (piece_types->piece_flags[n] & PF_NORET)
                           *move |= MOVE_RESET50;
                        move++;
                     }
                  } else {
                     *move = encode_normal_move(piece, from, to);
                     if (piece_types->piece_flags[n] & PF_NORET)
                        *move |= MOVE_RESET50;
                     move++;
                  }
               }
            }
         }
      }

      /* Double pawn steps (generalised) */
      if (bb&special_zone[n] && special_move_flags[n] & MF_STEPPER) {
         int si = (special_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            if (!c) continue;
            bitboard_t moves = bb&special_zone[n];
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= step_mask[d];
               moves = sshift64(moves, step_shift[d]);
               moves &= ~occupied;
            }
            /* Scan all bits */
            bitboard_t m = moves & destination_mask;
            while (m) {
               int to = bitscan64(m);
               int from = to - 2*step_shift[d];
               m ^= make_bitboard_square(to);

               *move = encode_normal_move(piece, from, to);
               if (piece_types->piece_flags[n] & PF_SET_EP)
                  *move |= MOVE_SET_ENPASSANT;
               if (piece_types->piece_flags[n] & PF_NORET)
                  *move |= MOVE_RESET50;
               move++;
            }
         }
      }

      /* Ditto captures */
      if (bb && piece_capture_flags[n] & MF_STEPPER) {
         int si = (piece_capture_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            bitboard_t captures = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               captures &= step_mask[d];
               captures = sshift64(captures, step_shift[d]);

               /* Scan all bits */
               bitboard_t bb = captures & (enemy | ep_capture) & destination_mask;
               while (bb) {
                  int to = bitscan64(bb);
                  int from = to - c*step_shift[d];
                  int ptaken = piece_for_side(get_piece(board, to), next_side[side_to_move]);
                  bb ^= make_bitboard_square(to);

                  /* Check for promotions */
                  if (piece_types->piece_promotion_zone[n] & make_bitboard_square(to)) {
                     uint16_t c = piece_types->piece_promotion_choice[n] & allowed_promotion_pieces;
                     while (c) {
                        int tpiece = bitscan16(c);
                        c ^= 1<<tpiece;
                        if (piece_types->piece_maximum[tpiece][side_to_move] == 128 ||
                            popcount64(board->bbp[tpiece] & own) < piece_types->piece_maximum[tpiece][side_to_move]) {
                           tpiece = piece_for_side(tpiece, side_to_move);
                           *move = encode_capture_promotion(piece, from, to, ptaken, tpiece); move++;
                        }
                     }
                     /* If promotions are optional, we also encode a normal move */
                     if (piece_types->piece_optional_promotion_zone[n] & make_bitboard_square(to)) {
                        if (ep_capture & make_bitboard_square(to)) {
                           int ptaken = piece_for_side(get_piece(board, board->ep_capture), next_side[side_to_move]);
                           *move = encode_en_passant_capture(piece, from, to, ptaken, board->ep_capture);
                        } else {
                           *move = encode_normal_capture(piece, from, to, ptaken);
                        }
                        move++;
                     }
                  } else {
                     if (ep_capture & make_bitboard_square(to)) {
                        int ptaken = piece_for_side(get_piece(board, board->ep_capture), next_side[side_to_move]);
                        *move = encode_en_passant_capture(piece, from, to, ptaken, board->ep_capture);
                     } else {
                        *move = encode_normal_capture(piece, from, to, ptaken);
                     }
                     move++;
                  }
               }
               captures &= ~occupied;
            }
         }
      }

      /* Castling
       * Because of the hassle when doing legality testing, we explicitly test whether castling is allowed in
       * the current position by testing for attacks on any of the critical squares. This is a hassle and
       * potentially slow, but only if castling may be possible in the current position.
       */
      if (expect(piece_types->piece_flags[n] & PF_CASTLE, false) && (board->init&bb)) {
         if ((board->init & short_castle_mask[side_to_move]) == short_castle_mask[side_to_move]) {
            if (!short_castle_free[side_to_move] || (occupied & short_castle_free[side_to_move]) == board_empty && (destination_mask & short_castle_free[side_to_move])) {
               bitboard_t test             = short_castle_safe[side_to_move];
               bitboard_t mask             = get_super_attacks_for_squares(test);
               bitboard_t attacked_squares = generate_attack_bitboard(board, test, mask, next_side[side_to_move]);
               if ((attacked_squares & short_castle_safe[side_to_move]) == board_empty) {
                  int from1 = bitscan64(short_castle_mask[side_to_move] & bb);
                  int from2 = bitscan64(short_castle_mask[side_to_move] & ~bb);
                  int piece2 = piece_for_side(get_piece(board, from2), side_to_move);
                  int to1 = short_castle_king_dest[side_to_move];
                  *move = encode_castle_move(piece, from1, to1, piece2, from2, to1-1); move++;
               }
            }
         }

         if ((board->init & long_castle_mask[side_to_move]) == long_castle_mask[side_to_move]) {
            if (!long_castle_free[side_to_move] || (occupied & long_castle_free[side_to_move]) == board_empty && (destination_mask & long_castle_free[side_to_move])) {
               bitboard_t test             = long_castle_safe[side_to_move];
               bitboard_t mask             = get_super_attacks_for_squares(test);
               bitboard_t attacked_squares = generate_attack_bitboard(board, test, mask, next_side[side_to_move]);
               if ((attacked_squares & long_castle_safe[side_to_move]) == board_empty) {
                  int from1 = bitscan64(long_castle_mask[side_to_move] & bb);
                  int from2 = bitscan64(long_castle_mask[side_to_move] & ~bb);
                  int piece2 = piece_for_side(get_piece(board, from2), side_to_move);
                  int to1 = long_castle_king_dest[side_to_move];
                  *move = encode_castle_move(piece, from1, to1, piece2, from2, to1+1); move++;
               }
            }
         }
      }

      /* Now determine slider and leaper moves for this piece type - if it has any */
      if ( (piece_move_flags[n] | piece_capture_flags[n] | special_move_flags[n]) & (MF_HOPPER|MF_SLIDER|MF_IS_LEAPER)) {
         while (bb) {
            bitboard_t moves = 0;
            bitboard_t captures = 0;
            int from = bitscan64(bb);
            move_flag_t move_flags = piece_move_flags[n];
            bb ^= make_bitboard_square(from);

            /* Special moves; mainly for pawn-type pieces */
            if (special_zone[n]&make_bitboard_square(from))
               move_flags = special_move_flags[n];

            /* Determine the movement type for this piece */

            /* Get all regular moves
             * Test common combinations more efficiently.
             */
            switch (move_flags & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  moves |= get_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  moves |= get_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  moves |= get_rank_attacks(hmslider, occupied, from) |
                           get_file_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_diagonal_attacks(hmslider, occupied, from) |
                           get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_rank_attacks(hmslider, occupied, from) |
                           get_file_attacks(hmslider, occupied, from) |
                           get_diagonal_attacks(hmslider, occupied, from) |
                           get_antidiagonal_attacks(hmslider, occupied, from);
                  break;
               default:
                  if (move_flags & MF_SLIDER_H) moves |= get_rank_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_V) moves |= get_file_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_D) moves |= get_diagonal_attacks(hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_A) moves |= get_antidiagonal_attacks(hmslider, occupied, from);
                  if (move_flags & MF_HOPPER_H) moves |= get_rank_attacks(hmhopper, occupied, from);
                  if (move_flags & MF_HOPPER_V) moves |= get_file_attacks(hmhopper, occupied, from);
                  if (move_flags & MF_HOPPER_D) moves |= get_diagonal_attacks(hmhopper, occupied, from);
                  if (move_flags & MF_HOPPER_A) moves |= get_antidiagonal_attacks(hmhopper, occupied, from);
                  if (move_flags & MF_IS_LEAPER) moves |= get_leaper_moves(move_flags, occupied, from);
            }

            /* Get all regular captures
             * Test common combinations more efficiently.
             */
            if ( (move_flags & (MF_HOPPER | MF_SLIDER | MF_IS_LEAPER)) == (piece_capture_flags[n] & (MF_HOPPER | MF_SLIDER | MF_IS_LEAPER)) ) {
               captures = moves;
            } else
            switch (piece_capture_flags[n] & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  captures |= get_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  captures |= get_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  captures |= get_rank_attacks(hcslider, occupied, from) |
                              get_file_attacks(hcslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_diagonal_attacks(hcslider, occupied, from) |
                              get_antidiagonal_attacks(hcslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_rank_attacks(hcslider, occupied, from) |
                              get_file_attacks(hcslider, occupied, from) |
                              get_diagonal_attacks(hcslider, occupied, from) |
                              get_antidiagonal_attacks(hcslider, occupied, from);
                  break;
               default:
                  if (piece_capture_flags[n] & MF_SLIDER_H) captures |= get_rank_attacks(hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_V) captures |= get_file_attacks(hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_D) captures |= get_diagonal_attacks(hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_A) captures |= get_antidiagonal_attacks(hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_H) captures |= get_rank_attacks(hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_V) captures |= get_file_attacks(hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_D) captures |= get_diagonal_attacks(hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_A) captures |= get_antidiagonal_attacks(hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_IS_LEAPER) captures |= get_leaper_moves(move_flags, occupied, from);
            }


            /* Mask out occupied squares from normal moves, only capture enemy pieces */
            moves &= ~occupied;
            captures &= enemy;

            moves &= destination_mask;
            captures &= destination_mask;

            /* Loop over all bits to read off all destination squares */
            while (moves) {
               int to = bitscan64(moves);
               moves ^= make_bitboard_square(to);
               *move = encode_normal_move(piece, from, to); move++;
            }
            while (captures) {
               int to = bitscan64(captures);
               int ptaken = piece_for_side(get_piece(board, to), next_side[side_to_move]);
               captures ^= make_bitboard_square(to);
               *move = encode_normal_capture(piece, from, to, ptaken); move++;
            }
         }
      }
   }

done:

   /* Store the number of moves currently in the list */
   movelist->num_moves = (move - &(movelist->move[0]));
}



/* Generate moves on an large board */
static void generate_large_moves_mask(movelist_t *movelist, const board_t *board, large_bitboard_t source_mask, large_bitboard_t destination_mask, sides side_to_move, uint16_t allowed_promotion_pieces)
{
   piece_description_t *piece_types;
   move_flag_t *piece_capture_flags;
   move_flag_t *piece_move_flags;
   move_flag_t *special_move_flags;
   large_bitboard_t *special_zone;
   large_bitboard_t own, enemy, own_movers;
   large_bitboard_t occupied;
   large_bitboard_t attacked;
   move_t *move;
   int n;

   piece_types = board->piece_types;
   piece_capture_flags = piece_types->piece_capture_flags;
   piece_move_flags = piece_types->piece_move_flags;
   special_zone = piece_types->piece_large_special_zone;
   special_move_flags = piece_types->piece_special_move_flags;

   /* Bookkeeping: we keep a pointer to the next move in the move list, and
    * update the number of moves in the list at the end of this function
    */
   move = &(movelist->move[movelist->num_moves]);

   own = board->large_bbc[side_to_move];
   enemy = board->large_bbc[next_side[side_to_move]];

   /* FIXME: doesn't take into account more than two sides yet... */
   occupied = own | enemy;

   own_movers = own & source_mask;

   /* Generate drops */
   if (expect(board->use_holdings, false)) {
      bool dropped = false;
      for (n=0; n<piece_types->num_piece_types; n++) {
         if (board->holdings[n][side_to_move]) {
            dropped = true;

            int piece = piece_for_side(n, side_to_move);
            large_bitboard_t bb = destination_mask & ~own & board->large_drop_zone[side_to_move] & piece_types->piece_large_drop_zone[n];
            while (!is_zero128(bb)) {
               int to = bitscan128(bb);
               bb ^= large_square_bitboards[to];
               *move = encode_drop_move(piece, to);
               *move = add_move_retrieve(*move, piece, 1);
               move++;
            }
         }
      }

      /* Break out early if drops are possible and are forced if possible; no other moves are legal. */
      if (dropped && (board->rule_flags & RF_FORCE_DROPS))
         goto done;
   }

   /* Now generate moves for all pieces; only scan our own pieces. This mainly helps variants with different
    * armies.
    */
   large_bitboard_t movers = own_movers;
   while (!is_zero128(movers)) {
      int first_mover = bitscan128(movers);
      n = board->piece[first_mover];
      movers &= ~board->large_bbp[n];

      assert(!is_zero128(board->large_bbp[n] & large_square_bitboards[first_mover]));

      large_bitboard_t ep_capture = large_board_empty;

      if (piece_types->piece_flags[n] & PF_TAKE_EP && board->ep)
         ep_capture = large_square_bitboards[board->ep];

      large_bitboard_t bb = own_movers & board->large_bbp[n];
      int piece = piece_for_side(n, side_to_move);

      /* Check for stepper moves, which are generated in parallel */
      if (!is_zero128(bb) && piece_move_flags[n] & MF_STEPPER) {
         int si = (piece_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            large_bitboard_t moves = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= large_step_mask[d];
               moves = sshift128(moves, large_step_shift[d]);
               moves &= ~occupied;

               /* Scan all bits */
               large_bitboard_t bb = moves & destination_mask;
               while (!is_zero128(bb)) {
                  int to = bitscan128(bb);
                  int from = to - c*large_step_shift[d];
                  bb ^= large_square_bitboards[to];

                  assert(!is_zero128(board->large_bbp[n] & board->large_bbc[side_to_move] & large_square_bitboards[from]));

                  /* Check for promotions
                   * When the piece moves into the promotion zone, it will get promoted to one of the allowed
                   * promotion pieces, which can be different for each piece type (and further restricted, for
                   * instance during Q-search).
                   * Promotion to a royal piece is only allowed if the number of royal pieces a player has is
                   * smaller than the maximum number of royal pieces.
                   */
                  if (!is_zero128(piece_types->piece_large_promotion_zone[n] & large_square_bitboards[to])) {
                     uint16_t c = piece_types->piece_promotion_choice[n] & allowed_promotion_pieces;
                     while (c) {
                        int tpiece = bitscan16(c);
                        c ^= 1<<tpiece;
                        if (piece_types->piece_maximum[tpiece][side_to_move] == 128 ||
                            popcount128(board->large_bbp[tpiece] & own) < piece_types->piece_maximum[tpiece][side_to_move]) {
                           tpiece = piece_for_side(tpiece, side_to_move);
                           *move = encode_normal_promotion(piece, from, to, tpiece); move++;
                        }
                     }
                     /* If promotions are optional, we also encode a normal move */
                     if (!is_zero128(piece_types->piece_large_optional_promotion_zone[n]&large_square_bitboards[to])) {
                        *move = encode_normal_move(piece, from, to);
                        if (piece_types->piece_flags[n] & PF_NORET)
                           *move |= MOVE_RESET50;
                        move++;
                     }
                  } else {
                     *move = encode_normal_move(piece, from, to);
                     if (piece_types->piece_flags[n] & PF_NORET)
                        *move |= MOVE_RESET50;
                     move++;
                  }
               }
            }
         }
      }

      /* Double pawn steps (generalised) */
      if (!is_zero128(bb & special_zone[n]) && special_move_flags[n] & MF_STEPPER) {
         int si = (special_move_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            if (!c) continue;
            large_bitboard_t moves = bb&special_zone[n];
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               moves &= large_step_mask[d];
               moves = sshift128(moves, large_step_shift[d]);
               moves &= ~occupied;
            }
            /* Scan all bits */
            large_bitboard_t m = moves & destination_mask;
            while (!is_zero128(m)) {
               int to = bitscan128(m);
               int from = to - 2*large_step_shift[d];
               m ^= large_square_bitboards[to];

               *move = encode_normal_move(piece, from, to);
               if (piece_types->piece_flags[n] & PF_SET_EP)
                  *move |= MOVE_SET_ENPASSANT;
               if (piece_types->piece_flags[n] & PF_NORET)
                  *move |= MOVE_RESET50;
               move++;
            }
            moves ^= m;
         }
      }

      /* Ditto captures */
      if (!is_zero128(bb) && piece_capture_flags[n] & MF_STEPPER) {
         int si = (piece_capture_flags[n] & MF_STEPPER) >> 8;
         int d;
         for (d=0; d<8; d++) {
            int c = (stepper_description[si] >> (d*3)) & 7;
            large_bitboard_t captures = bb;
            /* We have a repetition count, so we do a number of steps one after the other.
             * This can effectively duplicate a slider.
             */
            for ( ; c>0; c--) {
               captures &= large_step_mask[d];
               captures = sshift128(captures, large_step_shift[d]);

               /* Scan all bits */
               large_bitboard_t bb = captures & (enemy | ep_capture) & destination_mask;
               while (!is_zero128(bb)) {
                  int to = bitscan128(bb);
                  int from = to - c*large_step_shift[d];
                  int ptaken = piece_for_side(get_piece(board, to), next_side[side_to_move]);
                  bb ^= large_square_bitboards[to];

                  /* Check for promotions */
                  if (!is_zero128(piece_types->piece_large_promotion_zone[n] & large_square_bitboards[to])) {
                     uint16_t c = piece_types->piece_promotion_choice[n] & allowed_promotion_pieces;
                     while (c) {
                        int tpiece = bitscan16(c);
                        c ^= 1<<tpiece;
                        if (piece_types->piece_maximum[tpiece][side_to_move] == 128 ||
                            popcount128(board->large_bbp[tpiece] & own) < piece_types->piece_maximum[tpiece][side_to_move]) {
                           tpiece = piece_for_side(tpiece, side_to_move);
                           *move = encode_capture_promotion(piece, from, to, ptaken, tpiece); move++;
                        }
                     }
                     /* If promotions are optional, we also encode a normal move */
                     if (!is_zero128(piece_types->piece_large_optional_promotion_zone[n]&large_square_bitboards[to])) {
                        if (!is_zero128(ep_capture & large_square_bitboards[to])) {
                           int ptaken = piece_for_side(get_piece(board, board->ep_capture), next_side[side_to_move]);
                           *move = encode_en_passant_capture(piece, from, to, ptaken, board->ep_capture);
                        } else {
                           *move = encode_normal_capture(piece, from, to, ptaken);
                        }
                        move++;
                     }
                  } else {
                     if (!is_zero128(ep_capture & large_square_bitboards[to])) {
                        int ptaken = piece_for_side(get_piece(board, board->ep_capture), next_side[side_to_move]);
                        *move = encode_en_passant_capture(piece, from, to, ptaken, board->ep_capture);
                     } else {
                        *move = encode_normal_capture(piece, from, to, ptaken);
                     }
                     move++;
                  }
               }
               captures &= ~occupied;
            }
         }
      }

      /* Castling
       * Because of the hassle when doing legality testing, we explicitly test whether castling is allowed in
       * the current position by testing for attacks on any of the critical squares. This is a hassle and
       * potentially slow, but only if castling may be possible in the current position.
       */
      if (expect(piece_types->piece_flags[n] & PF_CASTLE, false) && !is_zero128(board->large_init&bb)) {
         if (is_equal128(board->large_init & large_short_castle_mask[side_to_move], large_short_castle_mask[side_to_move])) {
            if (is_zero128(large_short_castle_free[side_to_move]) ||
                (is_zero128(occupied & large_short_castle_free[side_to_move]) &&
                 !is_zero128(destination_mask & large_short_castle_free[side_to_move]))) {
               large_bitboard_t test             = large_short_castle_safe[side_to_move];
               large_bitboard_t mask             = get_large_super_attacks_for_squares(test);
               large_bitboard_t attacked_squares = generate_large_attack_bitboard(board, test, mask, next_side[side_to_move]);
               if (is_zero128(attacked_squares & large_short_castle_safe[side_to_move])) {
                  int from1 = bitscan128(large_short_castle_mask[side_to_move] & bb);
                  int from2 = bitscan128(large_short_castle_mask[side_to_move] & ~bb);
                  int piece2 = piece_for_side(get_piece(board, from2), side_to_move);
                  int to1 = short_castle_king_dest[side_to_move];
                  *move = encode_castle_move(piece, from1, to1, piece2, from2, to1-1); move++;
               }
            }
         }

         if (is_equal128(board->large_init & large_long_castle_mask[side_to_move], large_long_castle_mask[side_to_move])) {
            if (is_zero128(large_long_castle_free[side_to_move]) ||
                (is_equal128(occupied & large_long_castle_free[side_to_move], large_board_empty) &&
                 !is_zero128(destination_mask & large_long_castle_free[side_to_move]))) {
               large_bitboard_t test             = large_long_castle_safe[side_to_move];
               large_bitboard_t mask             = get_large_super_attacks_for_squares(test);
               large_bitboard_t attacked_squares = generate_large_attack_bitboard(board, test, mask, next_side[side_to_move]);
               if (is_zero128(attacked_squares & large_long_castle_safe[side_to_move])) {
                  int from1 = bitscan128(large_long_castle_mask[side_to_move] & bb);
                  int from2 = bitscan128(large_long_castle_mask[side_to_move] & ~bb);
                  int piece2 = piece_for_side(get_piece(board, from2), side_to_move);
                  int to1 = long_castle_king_dest[side_to_move];
                  *move = encode_castle_move(piece, from1, to1, piece2, from2, to1+1); move++;
               }
            }
         }
      }

      /* Now determine slider and leaper moves for this piece type - if it has any */
      if ( (piece_move_flags[n] | piece_capture_flags[n] | special_move_flags[n]) & (MF_SLIDER|MF_IS_LEAPER)) {
         while (!is_zero128(bb)) {
            large_bitboard_t moves = large_board_empty;
            large_bitboard_t captures = large_board_empty;
            int from = bitscan128(bb);
            move_flag_t move_flags = piece_move_flags[n];
            bb ^= large_square_bitboards[from];

            /* Special moves; mainly for pawn-type pieces */
            /* FIXME: the normal move is disabled by this code, which is different from what is done
             * for steppers, and could be considered a bug.
             */
            if (!is_zero128(special_zone[n]&large_square_bitboards[from]))
               move_flags = special_move_flags[n];

            /* Determine the movement type for this piece */

            /* Get all regular moves
             * Test common combinations more efficiently.
             */
            switch (move_flags & (MF_SLIDER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  moves |= get_large_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  moves |= get_large_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  moves |= get_large_rank_attacks(large_hmslider, occupied, from) |
                           get_large_file_attacks(large_vmslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_large_diagonal_attacks(large_hmslider, occupied, from) |
                           get_large_antidiagonal_attacks(large_hmslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  moves |= get_large_rank_attacks(large_hmslider, occupied, from) |
                           get_large_file_attacks(large_vmslider, occupied, from) |
                           get_large_diagonal_attacks(large_hmslider, occupied, from) |
                           get_large_antidiagonal_attacks(large_hmslider, occupied, from);
                  break;
               default:
                  if (move_flags & MF_SLIDER_H) moves |= get_large_rank_attacks(large_hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_V) moves |= get_large_file_attacks(large_vmslider, occupied, from);
                  if (move_flags & MF_SLIDER_D) moves |= get_large_diagonal_attacks(large_hmslider, occupied, from);
                  if (move_flags & MF_SLIDER_A) moves |= get_large_antidiagonal_attacks(large_hmslider, occupied, from);
                  if (move_flags & MF_IS_LEAPER) moves |= get_large_leaper_moves(move_flags, occupied, from);
            }

            /* Get all regular captures
             * Test common combinations more efficiently.
             */
            // It should be possible to use the same table for slider moves and captures (because of the &
            // masks below), which would reduce the pressure on the processor cache and avoid some (expensivem
            // in 32 bit mode) calculations.
            if ( (move_flags & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER)) == (piece_capture_flags[n] & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER)) ) {
               captures = moves;
            } else {
            switch (piece_capture_flags[n] & (MF_SLIDER | MF_HOPPER | MF_IS_LEAPER | MF_LEAPER_ASYMM)) {
               case MF_IS_LEAPER | MF_LEAPER_ASYMM:
                  captures |= get_large_aleaper_moves(move_flags, occupied, from, side_to_move);
                  break;
               case MF_IS_LEAPER:
                  captures |= get_large_leaper_moves(move_flags, occupied, from);
                  break;
               case (MF_HOPPER_H | MF_HOPPER_V):
                  captures |= get_large_rank_attacks(large_hchopper, occupied, from) |
                              get_large_file_attacks(large_vchopper, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V):
                  captures |= get_large_rank_attacks(large_hcslider, occupied, from) |
                              get_large_file_attacks(large_vcslider, occupied, from);
                  break;
               case (MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_large_diagonal_attacks(large_hcslider, occupied, from) |
                              get_large_antidiagonal_attacks(large_hcslider, occupied, from);
                  break;
               case (MF_SLIDER_H | MF_SLIDER_V | MF_SLIDER_D | MF_SLIDER_A):
                  captures |= get_large_rank_attacks(large_hcslider, occupied, from) |
                              get_large_file_attacks(large_vcslider, occupied, from) |
                              get_large_diagonal_attacks(large_hcslider, occupied, from) |
                              get_large_antidiagonal_attacks(large_hcslider, occupied, from);
                  break;
               default:
                  if (piece_capture_flags[n] & MF_HOPPER_H) captures |= get_large_rank_attacks(large_hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_V) captures |= get_large_file_attacks(large_vchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_D) captures |= get_large_diagonal_attacks(large_hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_HOPPER_A) captures |= get_large_antidiagonal_attacks(large_hchopper, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_H) captures |= get_large_rank_attacks(large_hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_V) captures |= get_large_file_attacks(large_vcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_D) captures |= get_large_diagonal_attacks(large_hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_SLIDER_A) captures |= get_large_antidiagonal_attacks(large_hcslider, occupied, from);
                  if (piece_capture_flags[n] & MF_IS_LEAPER) captures |= get_large_leaper_moves(move_flags, occupied, from);
            }
            }


            /* Mask out occupied squares from normal moves, only capture enemy pieces */
            moves &= ~occupied;
            captures &= enemy;

            moves &= destination_mask;
            captures &= destination_mask;

            /* Loop over all bits to read off all destination squares */
            while (!is_zero128(moves)) {
               int to = bitscan128(moves);
               moves ^= large_square_bitboards[to];
               *move = encode_normal_move(piece, from, to); move++;
            }
            while (!is_zero128(captures)) {
               int to = bitscan128(captures);
               int ptaken = piece_for_side(get_piece(board, to), next_side[side_to_move]);
               captures ^= large_square_bitboards[to];
               *move = encode_normal_capture(piece, from, to, ptaken); move++;
            }
         }
      }
   }

done:

   /* Store the number of moves currently in the list */
   movelist->num_moves = (move - &(movelist->move[0]));
}

void generate_moves(movelist_t *movelist, const board_t *board, sides side_to_move)
{
   bitboard_t destination = board_all;
   large_bitboard_t large_origin = large_board_all;
   large_bitboard_t large_destination = large_board_all;

   /* If we are in check, then only generate moves in/to the area that can be reached by a superpiece standing
    * in the location of the king(s). These will be the only candidates for resolving the check, all other
    * moves will be pruned anyway.
    */
   if (board->in_check) {
      if (board->large_board) {
         large_bitboard_t royal = board->large_royal & board->large_bbc[side_to_move];
         assert(!is_zero128(royal));

         large_destination = get_large_super_attacks_for_squares(royal);
      } else {
         bitboard_t royal = board->royal & board->bbc[side_to_move];
         assert(royal);

         destination = get_super_attacks_for_squares(royal);
      }
   }

   movelist->num_moves = 0;
   if (board->large_board)
      generate_large_moves_mask(movelist, board, large_origin, large_destination, side_to_move, 0xffff);
   else
      generate_moves_mask(movelist, board, board_all, destination, side_to_move, 0xffff);

#if 0
   printf("%d\n", movelist->num_moves);
   int n;
   for (n=0; n<movelist->num_moves; n++) {
      printf("%s  ", move_string(movelist->move[n], NULL));
   }
   printf("\n");
#endif
}

void generate_quiescence_moves(movelist_t *movelist, const board_t *board, sides side_to_move)
{
   int n;

   movelist->num_moves = 0;

   if (board->large_board) {
      large_bitboard_t mask = board->large_bbc[next_side[side_to_move]];

      /* Captures */
      generate_large_moves_mask(movelist, board, large_board_all, mask, side_to_move, 0xffff);

      /* Promotion moves (non-captures) */
      for (n=0; n<board->piece_types->num_piece_types; n++) {
         if (!is_zero128(board->piece_types->piece_large_promotion_zone[n]) && !is_zero128(board->large_bbp[n]&board->large_bbc[side_to_move])) {
            generate_large_moves_mask(movelist, board, board->large_bbp[n]&board->large_bbc[side_to_move],
                                    (~mask)&board->piece_types->piece_large_promotion_zone[n], side_to_move, 0xffff);
         }
      }
   } else {
      bitboard_t mask = board->bbc[next_side[side_to_move]];

      /* Captures */
      generate_moves_mask(movelist, board, board_all, mask, side_to_move, 0xffff);

      /* Promotion moves (non-captures) */
      for (n=0; n<board->piece_types->num_piece_types; n++) {
         if (board->piece_types->piece_promotion_zone[n] && (board->bbp[n]&board->bbc[side_to_move])) {
            generate_moves_mask(movelist, board, board->bbp[n]&board->bbc[side_to_move],
                                    (~mask)&board->piece_types->piece_promotion_zone[n], side_to_move, 0xffff);
         }
      }
   }

#if 0
   print_bitboards(board);
   printf("%d\n", movelist->num_moves);
   for (n=0; n<movelist->num_moves; n++) {
      printf("%s  ", move_string(movelist->move[n], NULL));
   }
   printf("\n");
   printf_bitboard(board->bbp[next_side[side_to_move]]);
#endif
}

static movelist_t movelist;
/* FIXME: this only returns the bitboard for *captures*, which is not good enough if we want to evaluate
 * mobility properly!
 */
bitboard_t get_move_bitboard(const board_t *board, bitboard_t piece_mask, sides side_to_move)
{
   return generate_move_bitboard_mask(board, piece_mask, board_all, side_to_move, 0xffff);
}

bitboard_t get_attack_bitboard(const board_t *board, bitboard_t piece_mask, sides side_to_move)
{
   return generate_attack_bitboard_mask(board, piece_mask, board_all, side_to_move, 0xffff);
}

large_bitboard_t get_large_attack_bitboard(const board_t *board, large_bitboard_t piece_mask, sides side_to_move)
{
   return generate_large_attack_bitboard_mask(board, piece_mask, large_board_all, side_to_move, 0xffff);
}

/* Returns the index in the movelist of a valid move corresponding to the
 * given origin/destination. Intended for player interaction.
 * Returns -1 if no moves in the list match (ie, the requested move is
 * invalid). In the case of a promotion, there will be more than one move
 * that matches.
 */
int validate_move(const movelist_t *movelist, int from, int to)
{
   int n;
   if (!movelist)
      return -1;

   for (n=0; n<movelist->num_moves; n++) {
      if (get_move_from(movelist->move[n]) == from && get_move_to(movelist->move[n]) == to)
         return n;
   }
   return -1;
}

