4 files changed, 1748 insertions, 0 deletions
diff --git a/library/stdarch/examples/Cargo.toml b/library/stdarch/examples/Cargo.toml
new file mode 100644
index 000000000..e2590ed9f
--- /dev/null
+++ b/library/stdarch/examples/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+name = "stdarch_examples"
+version = "0.0.0"
+authors = [
+    "Alex Crichton <alex@alexcrichton.com>",
+    "Andrew Gallant <jamslam@gmail.com>",
+    "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
+]
+description = "Examples of the stdarch crate."
+edition = "2018"
+default-run = "hex"
+
+[dependencies]
+core_arch = { path = "../crates/core_arch" }
+std_detect = { path = "../crates/std_detect" }
+quickcheck = "0.9"
+rand = "0.7"
+
+[[bin]]
+name = "hex"
+path = "hex.rs"
+
+[[bin]]
+name = "connect5"
+path = "connect5.rs"
+
+[[example]]
+name = "wasm"
+crate-type = ["cdylib"]
+path = "wasm.rs"
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
new file mode 100644
index 000000000..1b3325785
--- /dev/null
+++ b/library/stdarch/examples/connect5.rs
@@ -0,0 +1,1272 @@
+//! <b>Outer-Open Gomoku</b> is a board game which is a enhanced version of connect5 (Gomoku).\
+//! The game is a two-player game which played on a 15x15 Go board.\
+//! Two players take turns placing a move on an empty intersection in this board.\
+//! The winner is the first player to form an unbroken chain of five moves horizontally, vertically, or diagonally.\
+//! Unlike Gomoku, the first move is required to be placed at the two outer rows or columns of this board.\
+//! This program provides an AI playing with Minimax search with alpha-beta pruning which uses
+//! patterns on evaluation.\
+//! The avx512 intrinsic can do 32 pattern matching at one time.\
+//! This avx512 is tested with non-avx512 code to verify its correctness.\
+//!
+//! On Intel i7-7800x using single thread with fixed AVX-512 clock at 4.0GHz, the avx512 is speed up about 9x.\
+//! The average time for each move in the avx512 is around 14.00s <span>&#177;</span> 1.31s and in the non-avx512
+//! is 129.02s <span>&#177;</span> 4.96s.\
+//! On Intel Tiger Lake i7-1165G7, the avx512 is around 11.11s <span>&#177;</span> 1.31s.
+//!
+//! <b>Pattern Matching</b>\
+//! Use 512-bit to present the board state. The location 0 is top left.\
+//! 0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  <b>15</b>\
+//! 16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  <b>31</b>\
+//! ...\
+//! Pattern "OOOOO" is matching through "0 1 2 3 4", "1 2 3 4 5", ...\
+//! Using avx512, "0 1 2 3 4", "16 17 18 19 20", ... can be matched simultaneously.\
+//!
+//! //! You can test out this program via:
+//!
+//!     cargo +nightly run --release --bin connect5
+//!
+//! You should see a game self-playing. In the end of the game, it shows the average time for
+//! each move.
+
+#![feature(stdsimd, avx512_target_feature)]
+#![feature(stmt_expr_attributes)]
+
+use rand::seq::SliceRandom;
+use rand::thread_rng;
+
+use std::cmp;
+use std::time::Instant;
+
+#[cfg(target_arch = "x86")]
+use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected};
+#[cfg(target_arch = "x86_64")]
+use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected};
+
+// types
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum Color {
+    Black = 0,
+    White = 1,
+    Empty = 2,
+    Border = 3,
+}
+
+type Square = i32;
+type Move = i32;
+type Side = Color;
+
+// constants
+
+const FILE_SIZE: i32 = 15;
+const RANK_SIZE: i32 = 15;
+const SQUARE_SIZE: i32 = (FILE_SIZE + 1) * (FILE_SIZE + 4) + 16 + 4;
+
+const EVAL_INF: i32 = FILE_SIZE * RANK_SIZE * 100;
+const MOVE_NONE: Move = -1;
+const SCORE_NONE: i32 = -EVAL_INF - 1;
+
+/// DIRECTION 0: left to right\
+/// DIRECTION 1: top to bottom\
+/// DIRECTION 2: top left to bottom right\
+/// DIRECTION 3: top right to bottom left
+#[rustfmt::skip]
+const DIRECTION: [[i32; 5]; 4] = [ [1, 2, 3, 4, 5],
+                                   [1 * (FILE_SIZE + 1), 2 * (FILE_SIZE + 1), 3 * (FILE_SIZE + 1), 4 * (FILE_SIZE + 1), 5 * (FILE_SIZE + 1)],
+                                   [1 * (FILE_SIZE + 2), 2 * (FILE_SIZE + 2), 3 * (FILE_SIZE + 2), 4 * (FILE_SIZE + 2), 5 * (FILE_SIZE + 2)],
+                                   [1 * (FILE_SIZE + 0), 2 * (FILE_SIZE + 0), 3 * (FILE_SIZE + 0), 4 * (FILE_SIZE + 0), 5 * (FILE_SIZE + 0)]];
+
+/// A table to encode each location to a value in bit 31-0 in the bitboard for 4 direction
+#[rustfmt::skip]
+const MAPMOVEVALUE: [[i32; 239]; 4] = [ [// Direction 0 
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0,
+                                         1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17],
+                                        [// Direction 1
+                                         1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 0,
+                                         1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 0,
+                                         1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 0,
+                                         1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 0,
+                                         1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 0,
+                                         1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 0,
+                                         1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 0,
+                                         1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 0,
+                                         1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 0,
+                                         1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 0,
+                                         1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 0,
+                                         1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 0,
+                                         1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 0,
+                                         1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 0,
+                                         1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17],
+                                        [// Direction 2 
+                                         1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 0,     0,     0,     0,     0,
+                                         1<<15, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 0,     0,     0,     0,
+                                         1<<15, 1<<14, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 0,     0,     0,
+                                         1<<15, 1<<14, 1<<13, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 0,     0,
+                                         1<<15, 1<<14, 1<<13, 1<<12, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 0,
+                                         1<<15, 1<<14, 1<<13, 1<<12, 1<<11, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 0,
+                                         1<<9,  1<<14, 1<<13, 1<<12, 1<<11, 1<<10, 1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  0,
+                                         1<<8,  1<<8,  1<<13, 1<<12, 1<<11, 1<<10, 1<<9,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  0,
+                                         1<<7,  1<<7,  1<<7,  1<<12, 1<<11, 1<<10, 1<<9,  1<<8,  1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  0,
+                                         1<<6,  1<<6,  1<<6,  1<<6,  1<<11, 1<<10, 1<<9,  1<<8,  1<<7,  1<<6,  1<<6,  1<<6,  1<<6,  1<<6,  1<<6,  0,
+                                         1<<5,  1<<5,  1<<5,  1<<5,  1<<5,  1<<10, 1<<9,  1<<8,  1<<7,  1<<6,  1<<5,  1<<5,  1<<5,  1<<5,  1<<5,  0,
+                                         0,     1<<4,  1<<4,  1<<4,  1<<4,  1<<4,  1<<9,  1<<8,  1<<7,  1<<6,  1<<5,  1<<4,  1<<4,  1<<4,  1<<4,  0,
+                                         0,     0,     1<<3,  1<<3,  1<<3,  1<<3,  1<<3,  1<<8,  1<<7,  1<<6,  1<<5,  1<<4,  1<<3,  1<<3,  1<<3,  0,
+                                         0,     0,     0,     1<<2,  1<<2,  1<<2,  1<<2,  1<<2,  1<<7,  1<<6,  1<<5,  1<<4,  1<<3,  1<<2,  1<<2,  0,
+                                         0,     0,     0,     0,     1<<1,  1<<1,  1<<1,  1<<1,  1<<1,  1<<6,  1<<5,  1<<4,  1<<3,  1<<2,  1<<1],
+                                        [// Direction 3
+                                         0,     0,     0,     0,     1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 0,
+                                         0,     0,     0,     1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<15, 0,
+                                         0,     0,     1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<14, 1<<15, 0,
+                                         0,     1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<13, 1<<14, 1<<15, 0,
+                                         1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 0,
+                                         1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 0,
+                                         1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<9,  1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<9,  0,
+                                         1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<8,  1<<9,  1<<10, 1<<11, 1<<12, 1<<13, 1<<8,  1<<8,  0,
+                                         1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  1<<7,  1<<8,  1<<9,  1<<10, 1<<11, 1<<12, 1<<7,  1<<7,  1<<7,  0,
+                                         1<<6,  1<<6,  1<<6,  1<<6,  1<<6,  1<<6,  1<<7,  1<<8,  1<<9,  1<<10, 1<<11, 1<<6,  1<<6,  1<<6,  1<<6,  0,
+                                         1<<5,  1<<5,  1<<5,  1<<5,  1<<5,  1<<6,  1<<7,  1<<8,  1<<9,  1<<10, 1<<5,  1<<5,  1<<5,  1<<5,  1<<5,  0,
+                                         1<<4,  1<<4,  1<<4,  1<<4,  1<<5,  1<<6,  1<<7,  1<<8,  1<<9,  1<<4,  1<<4,  1<<4,  1<<4,  1<<4,  0,     0,
+                                         1<<3,  1<<3,  1<<3,  1<<4,  1<<5,  1<<6,  1<<7,  1<<8,  1<<3,  1<<3,  1<<3,  1<<3,  1<<3,  0,     0,     0,
+                                         1<<2,  1<<2,  1<<3,  1<<4,  1<<5,  1<<6,  1<<7,  1<<2,  1<<2,  1<<2,  1<<2,  1<<2,  0,     0,     0,     0,
+                                         1<<1,  1<<2,  1<<3,  1<<4,  1<<5,  1<<6,  1<<1,  1<<1,  1<<1,  1<<1,  1<<1,  0,     0,     0,     0]
+                                        ];
+
+/// A table to encode each location to an index in the bitboard for 4 direction 
+#[rustfmt::skip]
+const MAPMOVEIDX: [[i32; 239]; 4] = [ [// Direction 0 
+                                       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                                       1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,
+                                       2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,
+                                       3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  0,
+                                       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,
+                                       5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  0,
+                                       6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  0,
+                                       7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  0,
+                                       8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  0,
+                                       9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  0,
+                                       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0,
+                                       11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0,
+                                       12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0,
+                                       13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0,
+                                       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14],
+                                      [// Direction 1 
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0,
+                                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+                                      [// Direction 2
+                                       10, 9,  8,   7,  6,  5,  4,  3,  2,  1,  0,  0,  0,  0,  0,  0,
+                                       11, 10, 9,   8,  7,  6,  5,  4,  3,  2,  1,  0,  0,  0,  0,  0,
+                                       12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0,  0,  0,  0,
+                                       13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0,  0,  0,
+                                       14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0,  0,
+                                       15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0,
+                                        1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  0,
+                                        2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  0,
+                                        3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  0,
+                                        4,  3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  0,
+                                        5,  4,  3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  0,
+                                        0,  5,  4,  3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  7,  0,
+                                        0,  0,  5,  4,  3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  8,  0,
+                                        0,  0,  0,  5,  4,  3,  2,  1, 15, 14, 13, 12, 11, 10,  9,  0,
+                                        0,  0,  0,  0,  5,  4,  3,  2,  1, 15, 14, 13, 12, 11, 10],
+                                      [// Direction 3
+                                       0,  0,  0,  0,   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,
+                                       0,  0,  0,  0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,
+                                       0,  0,  0,  1,   2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  0,
+                                       0,  0,  1,  2,   3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  0,
+                                       0,  1,  2,  3,   4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
+                                       1,  2,  3,  4,   5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,  0,
+                                       2,  3,  4,  5,   6,  7,  8,  9, 10, 11, 12, 13, 14, 15,  1,  0,
+                                       3,  4,  5,  6,   7,  8,  9, 10, 11, 12, 13, 14, 15,  1,  2,  0,
+                                       4,  5,  6,  7,   8,  9, 10, 11, 12, 13, 14, 15,  1,  2,  3,  0,
+                                       5,  6,  7,  8,   9, 10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  0,
+                                       6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  5,  0,
+                                       7,  8,  9,  10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  5,  0,  0,
+                                       8,  9,  10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  5,  0,  0,  0,
+                                       9,  10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  5,  0,  0,  0,  0,
+                                       10, 11, 12, 13, 14, 15,  1,  2,  3,  4,  5,  0,  0,  0,  0]
+                                ];
+
+// structures
+
+/// Use one-dimensional array to store the board state. The location 0 is top left.\
+/// 0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  <b>15</b>\
+/// 16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  <b>31</b>\
+/// ... \
+/// position 15, 31, ... are Borders.\
+/// position 0 is file 0, rank 0.\
+/// position 17 is file 1, rank 1.\
+///
+/// Use a three-dimensional array to store the bitboard.\
+/// The first dimension is color: Black, White and Empty.\
+/// The second and third one are 2 x 512-bit. Direction 0 and 2 use the first 512-bit. Direction 1 and
+/// 3 use the second 512-bit.\
+/// Each 512-bit is a 32-bit x 16 array. Direction 0 and 1 store at bit 31-16 and Direction 2 and 3 store at bit 15-0.  
+
+pub struct Pos {
+    // position
+    state: [Color; SQUARE_SIZE as usize],
+    p_turn: Side,
+    bitboard: [[[i32; 16]; 2]; 3],
+}
+
+impl Pos {
+    pub fn init(&mut self) {
+        // starting position
+        // Set up the Border
+        for i in 0..SQUARE_SIZE as usize {
+            self.state[i] = Color::Border;
+        }
+
+        // In the beginning, all is Empty
+        for rk in 0..RANK_SIZE {
+            for fl in 0..FILE_SIZE {
+                let sq: Square = square_make(fl, rk);
+                self.state[sq as usize] = Color::Empty;
+            }
+        }
+
+        // first move is Black
+        self.p_turn = Color::Black;
+
+        let black = Color::Black as usize;
+        let white = Color::White as usize;
+        let empty = Color::Empty as usize;
+
+        // set up the corresponding bitboard
+        for i in 0..2 {
+            for j in 0..16 {
+                self.bitboard[black][i][j] = 0;
+                self.bitboard[white][i][j] = 0;
+                self.bitboard[empty][i][j] = 0;
+            }
+        }
+
+        for i in 0..2 {
+            // use bit 31-16 to store direction 0 and 1
+            #[rustfmt::skip]
+            for j in 0..FILE_SIZE as usize {
+                self.bitboard[empty][i][j] = (1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17);
+            }
+        }
+
+        // use bit 15-0 to store direction 2 and 3. There are 21 for each one. We combine row1 and row16, row2 and row17, row3 and row18, row4 and row19, and row 5 and row20
+        #[rustfmt::skip]
+        for i in 0..2 {
+            self.bitboard[empty][i][0]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11); //row 0
+            self.bitboard[empty][i][1]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)/*row1*/|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row16
+            self.bitboard[empty][i][2]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)/*row2*/|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row17
+            self.bitboard[empty][i][3]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)/*row3*/|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row18
+            self.bitboard[empty][i][4]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)/*row4*/|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row19
+            self.bitboard[empty][i][5]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)/*row5*/|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row20
+            self.bitboard[empty][i][6]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5);//row6
+            self.bitboard[empty][i][7]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4);//row7
+            self.bitboard[empty][i][8]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3);//row8
+            self.bitboard[empty][i][9]  |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2);//row9
+            self.bitboard[empty][i][10] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row10
+            self.bitboard[empty][i][11] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2);//row11
+            self.bitboard[empty][i][12] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3);//row12
+            self.bitboard[empty][i][13] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4);//row13
+            self.bitboard[empty][i][14] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5);//row14
+            self.bitboard[empty][i][15] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6);//row15
+        }
+    }
+
+    pub fn do_move(&mut self, mv: Move) {
+        let atk: Side = self.p_turn;
+        let def: Side = side_opp(atk);
+
+        let mv = mv as usize;
+        let black = Color::Black as usize;
+        let white = Color::White as usize;
+        let empty = Color::Empty as usize;
+
+        match self.p_turn {
+            Color::Black => {
+                self.state[mv as usize] = Color::Black;
+                // update black move and remove empty move in bitboard
+                self.bitboard[black][0][MAPMOVEIDX[0][mv] as usize] |= MAPMOVEVALUE[0][mv];
+                self.bitboard[empty][0][MAPMOVEIDX[0][mv] as usize] ^= MAPMOVEVALUE[0][mv];
+                self.bitboard[black][1][MAPMOVEIDX[1][mv] as usize] |= MAPMOVEVALUE[1][mv];
+                self.bitboard[empty][1][MAPMOVEIDX[1][mv] as usize] ^= MAPMOVEVALUE[1][mv];
+                self.bitboard[black][0][MAPMOVEIDX[2][mv] as usize] |= MAPMOVEVALUE[2][mv];
+                self.bitboard[empty][0][MAPMOVEIDX[2][mv] as usize] ^= MAPMOVEVALUE[2][mv];
+                self.bitboard[black][1][MAPMOVEIDX[3][mv] as usize] |= MAPMOVEVALUE[3][mv];
+                self.bitboard[empty][1][MAPMOVEIDX[3][mv] as usize] ^= MAPMOVEVALUE[3][mv];
+            }
+            Color::White => {
+                self.state[mv as usize] = Color::White;
+                // update white move and remove empty move in bitboard
+                self.bitboard[white][0][MAPMOVEIDX[0][mv] as usize] |= MAPMOVEVALUE[0][mv];
+                self.bitboard[empty][0][MAPMOVEIDX[0][mv] as usize] ^= MAPMOVEVALUE[0][mv];
+                self.bitboard[white][1][MAPMOVEIDX[1][mv] as usize] |= MAPMOVEVALUE[1][mv];
+                self.bitboard[empty][1][MAPMOVEIDX[1][mv] as usize] ^= MAPMOVEVALUE[1][mv];
+                self.bitboard[white][0][MAPMOVEIDX[2][mv] as usize] |= MAPMOVEVALUE[2][mv];
+                self.bitboard[empty][0][MAPMOVEIDX[2][mv] as usize] ^= MAPMOVEVALUE[2][mv];
+                self.bitboard[white][1][MAPMOVEIDX[3][mv] as usize] |= MAPMOVEVALUE[3][mv];
+                self.bitboard[empty][1][MAPMOVEIDX[3][mv] as usize] ^= MAPMOVEVALUE[3][mv];
+            }
+            _ => panic! {},
+        }
+
+        self.p_turn = def;
+    }
+
+    fn turn(&self) -> Side {
+        self.p_turn
+    }
+
+    pub fn can_play(&self, from: Square) -> bool {
+        if self.state[from as usize] == Color::Empty {
+            true
+        } else {
+            false
+        }
+    }
+}
+
+pub struct List {
+    // legal move list
+    p_move: [Move; (FILE_SIZE * RANK_SIZE) as usize],
+    p_size: i32,
+}
+
+/// Use List to store legal moves.
+impl List {
+    pub fn clear(&mut self) {
+        self.p_size = 0;
+    }
+
+    pub fn add(&mut self, mv: Move) {
+        self.p_move[self.p_size as usize] = mv;
+        self.p_size += 1;
+    }
+
+    pub fn size(&self) -> i32 {
+        self.p_size
+    }
+
+    pub fn shuffle(&mut self) {
+        let mut rng = thread_rng();
+        let num = self.p_size;
+        let mut new_move: Vec<Move> = vec![];
+
+        for x in 0..(num as usize) {
+            new_move.push(self.p_move[x]);
+        }
+
+        new_move.shuffle(&mut rng);
+
+        for x in 0..(self.p_size as usize) {
+            self.p_move[x] = new_move[x];
+        }
+    }
+}
+
+// functions
+
+fn square_make(fl: i32, rk: i32) -> Square {
+    rk * (FILE_SIZE + 1) + fl
+}
+
+fn side_opp(sd: Side) -> Side {
+    match sd {
+        Side::White => Side::Black,
+        Side::Black => Side::White,
+        _ => panic!(""),
+    }
+}
+
+fn pos_is_winner(pos: &Pos) -> bool {
+    let current_side = side_opp(pos.p_turn);
+    check_pattern5(&pos, current_side)
+}
+
+fn pos_is_draw(pos: &Pos) -> bool {
+    let mut found: bool = true;
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+            if pos.can_play(sq) {
+                found = false;
+                break;
+            }
+
+            if found == false {
+                break;
+            }
+        }
+    }
+
+    let mut out: bool = false;
+    if found == true && !pos_is_winner(pos) {
+        out = true;
+    }
+
+    out
+}
+
+#[target_feature(enable = "avx512f,avx512bw")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn pos_is_draw_avx512(pos: &Pos) -> bool {
+    let empty = Color::Empty as usize;
+
+    let board0org = _mm512_loadu_epi32(&pos.bitboard[empty][0][0]);
+
+    let answer = _mm512_set1_epi32(0);
+
+    // if all empty is 0, all board is filled.
+    let temp_mask = _mm512_mask_cmpneq_epi32_mask(0b11111111_11111111, answer, board0org);
+
+    if _popcnt32(temp_mask as i32) == 0 && !pos_is_winner_avx512(pos) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+fn pos_is_end(pos: &Pos) -> bool {
+    if pos_is_winner(pos) || pos_is_draw(pos) {
+        true
+    } else {
+        false
+    }
+}
+
+fn pos_disp(pos: &Pos) {
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+
+            match pos.state[sq as usize] {
+                Color::Black => print!("# "),
+                Color::White => print!("O "),
+                Color::Empty => print!("- "),
+                Color::Border => print!("| "),
+            }
+        }
+
+        println!("");
+    }
+
+    match pos.turn() {
+        Color::Black => println!("black to play"),
+        Color::White => println!("white to play"),
+        _ => panic!(),
+    }
+}
+
+fn gen_moves(list: &mut List, pos: &Pos) {
+    list.clear();
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+            if pos.can_play(sq) {
+                list.add(sq);
+            }
+        }
+    }
+}
+
+/// AI: use Minimax search with alpha-beta pruning
+fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
+    assert!(-EVAL_INF <= alpha && alpha < beta && beta <= EVAL_INF);
+    // leaf?
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            unsafe {
+                if pos_is_winner_avx512(&pos) {
+                    return -EVAL_INF + _ply;
+                }
+
+                if pos_is_draw_avx512(&pos) {
+                    return 0;
+                }
+            }
+        } else {
+            if pos_is_winner(&pos) {
+                return -EVAL_INF + _ply;
+            }
+
+            if pos_is_draw(&pos) {
+                return 0;
+            }
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        if pos_is_winner(&pos) {
+            return -EVAL_INF + _ply;
+        }
+
+        if pos_is_draw(&pos) {
+            return 0;
+        }
+    }
+
+    if depth == 0 {
+        return eval(&pos, _ply);
+    }
+
+    let p_move_new: [Move; (FILE_SIZE * RANK_SIZE) as usize] =
+        [0; (FILE_SIZE * RANK_SIZE) as usize];
+
+    let mut list = List {
+        p_move: p_move_new,
+        p_size: 0,
+    };
+
+    let mut bm: Move = MOVE_NONE;
+    let mut bs: i32 = SCORE_NONE;
+
+    gen_moves(&mut list, &pos);
+
+    // move loop
+
+    if _ply == 0 {
+        list.shuffle();
+    }
+
+    for i in 0..list.size() {
+        if bs < beta {
+            let mv: Move = list.p_move[i as usize];
+
+            let mut new_pos = Pos {
+                state: pos.state,
+                p_turn: pos.p_turn,
+                bitboard: pos.bitboard,
+            };
+
+            new_pos.do_move(mv);
+
+            let sc: i32 = -search(&new_pos, -beta, -cmp::max(alpha, bs), depth - 1, _ply + 1);
+
+            if sc > bs {
+                bm = mv;
+                bs = sc;
+            }
+        }
+    }
+
+    assert!(bm != MOVE_NONE);
+    assert!(bs >= -EVAL_INF && bs <= EVAL_INF);
+
+    if _ply == 0 {
+        bm
+    } else {
+        bs
+    } //best move at the root node, best score elsewhere
+}
+
+/// Evaluation function: give different scores to different patterns after a fixed depth.
+fn eval(pos: &Pos, _ply: i32) -> i32 {
+    let atk: Side = pos.turn();
+    let def: Side = side_opp(atk);
+
+    // check if opp has live4 which will win playing next move
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            unsafe {
+                if check_patternlive4_avx512(&pos, def) {
+                    return -4096;
+                }
+            }
+        } else {
+            if check_patternlive4(&pos, def) {
+                return -4096;
+            }
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        if check_patternlive4(&pos, def) {
+            return -4096;
+        }
+    }
+
+    // check if self has live4 which will win playing next move
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            unsafe {
+                if check_patternlive4_avx512(&pos, atk) {
+                    return 2560;
+                }
+            }
+        } else {
+            if check_patternlive4(&pos, atk) {
+                return 2560;
+            }
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        if check_patternlive4(&pos, atk) {
+            return 2560;
+        }
+    }
+
+    // check if self has dead4 which will win playing next move
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            unsafe {
+                if check_patterndead4_avx512(&pos, atk) > 0 {
+                    return 2560;
+                }
+            }
+        } else {
+            if check_patterndead4(&pos, atk) > 0 {
+                return 2560;
+            }
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        if check_patterndead4(&pos, atk) > 0 {
+            return 2560;
+        }
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            unsafe {
+                let n_c4: i32 = check_patterndead4_avx512(&pos, def);
+                let n_c3: i32 = check_patternlive3_avx512(&pos, def);
+
+                // check if opp has 2 dead4 which will win playing next move
+                if n_c4 > 1 {
+                    return -2048;
+                }
+
+                // check if opp has a dead 4 and live 3 which will win playing the next two move
+                if n_c4 == 1 && n_c3 > 0 {
+                    return -2048;
+                }
+
+                if check_patternlive3_avx512(&pos, atk) > 1 {
+                    return 2560;
+                }
+
+                // check if opp has 2 live3 which will win playing the next two move
+                if n_c3 > 1 {
+                    return -2048;
+                }
+            }
+        } else {
+            let n_c4: i32 = check_patterndead4(&pos, def);
+            let n_c3: i32 = check_patternlive3(&pos, def);
+
+            // check if opp has 2 dead4 which will win playing next move
+            if n_c4 > 1 {
+                return -2048;
+            }
+
+            // check if opp has a dead 4 and live 3 which will win playing the next two move
+            if n_c4 == 1 && n_c3 > 0 {
+                return -2048;
+            }
+
+            // check if self has 2 live3 which will win playing the next two move
+            if check_patternlive3(&pos, atk) > 1 {
+                return 2560;
+            }
+
+            // check if opp has 2 live3 which will win playing the next two move
+            if n_c3 > 1 {
+                return -2048;
+            }
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        let n_c4: i32 = check_patterndead4(&pos, def);
+        let n_c3: i32 = check_patternlive3(&pos, def);
+
+        // check if opp has 2 dead4 which will win playing next move
+        if n_c4 > 1 {
+            return -2048;
+        }
+
+        // check if opp has a dead 4 and live 3 which will win playing the next two move
+        if n_c4 == 1 && n_c3 > 0 {
+            return -2048;
+        }
+
+        // check if self has 2 live3 which will win playing the next two move
+        if check_patternlive3(&pos, atk) > 1 {
+            return 2560;
+        }
+
+        // check if opp has 2 live3 which will win playing the next two move
+        if n_c3 > 1 {
+            return -2048;
+        }
+    }
+
+    0
+}
+
+/// Check <b>OOOOO</b>
+fn check_pattern5(pos: &Pos, sd: Side) -> bool {
+    let mut n: i32 = 0;
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+
+            for pat in 0..4 {
+                let idx0 = sq;
+                let idx1 = sq + DIRECTION[pat][0];
+                let idx2 = sq + DIRECTION[pat][1];
+                let idx3 = sq + DIRECTION[pat][2];
+                let idx4 = sq + DIRECTION[pat][3];
+
+                let val0 = pos.state[idx0 as usize];
+                let val1 = pos.state[idx1 as usize];
+                let val2 = pos.state[idx2 as usize];
+                let val3 = pos.state[idx3 as usize];
+                let val4 = pos.state[idx4 as usize];
+
+                #[rustfmt::skip]
+                if val0 == sd && val1 == sd && val2 == sd && val3 == sd && val4 == sd { n += 1; }
+            }
+        }
+    }
+
+    if n > 0 {
+        true
+    } else {
+        false
+    }
+}
+
+/// Check <b>-OOOO-</b>
+fn check_patternlive4(pos: &Pos, sd: Side) -> bool {
+    let mut n: i32 = 0;
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+
+            for pat in 0..4 {
+                let idx0 = sq;
+                let idx1 = sq + DIRECTION[pat][0];
+                let idx2 = sq + DIRECTION[pat][1];
+                let idx3 = sq + DIRECTION[pat][2];
+                let idx4 = sq + DIRECTION[pat][3];
+                let idx5 = sq + DIRECTION[pat][4];
+
+                let val0 = pos.state[idx0 as usize];
+                let val1 = pos.state[idx1 as usize];
+                let val2 = pos.state[idx2 as usize];
+                let val3 = pos.state[idx3 as usize];
+                let val4 = pos.state[idx4 as usize];
+                let val5 = pos.state[idx5 as usize];
+
+                #[rustfmt::skip]
+                if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == sd && val5 == Color::Empty { n += 1; }
+            }
+        }
+    }
+
+    if n > 0 {
+        true
+    } else {
+        false
+    }
+}
+
+/// Check <b>OOOO-, OOO-O, OO-OO, O-OOO, -OOOO</b>
+fn check_patterndead4(pos: &Pos, sd: Side) -> i32 {
+    let mut n: i32 = 0;
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+
+            for dir in 0..4 {
+                let idx0 = sq;
+                let idx1 = sq + DIRECTION[dir][0];
+                let idx2 = sq + DIRECTION[dir][1];
+                let idx3 = sq + DIRECTION[dir][2];
+                let idx4 = sq + DIRECTION[dir][3];
+
+                let val0 = pos.state[idx0 as usize];
+                let val1 = pos.state[idx1 as usize];
+                let val2 = pos.state[idx2 as usize];
+                let val3 = pos.state[idx3 as usize];
+                let val4 = pos.state[idx4 as usize];
+
+                #[rustfmt::skip]
+                if val0 == sd && val1 == sd && val2 == sd && val3 == sd && val4 == Color::Empty { n += 1; }
+                #[rustfmt::skip]
+                if val0 == sd && val1 == sd && val2 == sd && val3 == Color::Empty && val4 == sd { n += 1; }
+                #[rustfmt::skip]
+                if val0 == sd && val1 == sd && val2 == Color::Empty && val3 == sd && val4 == sd { n += 1; }
+                #[rustfmt::skip]
+                if val0 == sd && val1 == Color::Empty && val2 == sd && val3 == sd && val4 == sd { n += 1; }
+                #[rustfmt::skip]
+                if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == sd { n += 1; }
+            }
+        }
+    }
+
+    n
+}
+
+/// Check <b>-OOO-, -OO-O-, -O-OO-</br>
+fn check_patternlive3(pos: &Pos, sd: Side) -> i32 {
+    let mut n: i32 = 0;
+
+    for rk in 0..RANK_SIZE {
+        for fl in 0..FILE_SIZE {
+            let sq: Square = square_make(fl, rk);
+
+            for dir in 0..4 {
+                let idx0 = sq;
+                let idx1 = sq + DIRECTION[dir][0];
+                let idx2 = sq + DIRECTION[dir][1];
+                let idx3 = sq + DIRECTION[dir][2];
+                let idx4 = sq + DIRECTION[dir][3];
+                let idx5 = sq + DIRECTION[dir][4];
+
+                let val0 = pos.state[idx0 as usize];
+                let val1 = pos.state[idx1 as usize];
+                let val2 = pos.state[idx2 as usize];
+                let val3 = pos.state[idx3 as usize];
+                let val4 = pos.state[idx4 as usize];
+                let val5 = pos.state[idx5 as usize];
+
+                #[rustfmt::skip]
+                if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == Color::Empty { n +=1; }
+                #[rustfmt::skip]
+                if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == Color::Empty && val4 == sd && val5 == Color::Empty { n += 1; }
+                #[rustfmt::skip]
+                if val0 == Color::Empty && val1 == sd && val2 == Color::Empty && val3 == sd && val4 == sd && val5 == Color::Empty { n += 1; }
+            }
+        }
+    }
+
+    n
+}
+
+#[target_feature(enable = "avx512f,avx512bw")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn pos_is_winner_avx512(pos: &Pos) -> bool {
+    let current_side = side_opp(pos.p_turn);
+    let coloridx = current_side as usize;
+
+    let board0org: [__m512i; 2] = [
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+    ]; // load states from bitboard
+
+    #[rustfmt::skip]
+    let answer = _mm512_set1_epi16((1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)); // an unbroken chain of five moves
+
+    // use Mask to filter out which data is not processed.
+    //    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
+    // 1  x x x x _ _ _ _ _ _  _  _  _  _  _  0  x  o  x  o  x  0  0  0  0  0  0  0  0  0  0  0
+    // 2  x _ _ _ _ o _ x o _  _  _  _  _  _  0  x  o  _  _  _  _  _| x  x  o  o  o  x  x  _  _
+    // .  ...
+    // .  ...
+    // .  ...
+    // 16 0 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  x  o  x  o  o  o  o  o  o  o  0  0  0  0  0  0
+    //
+    // answer_mask[0]: 01_11..............: "0" is in row 16 and column 1-16.
+    // There is no data to match (x = black, o = white, _ = empty, 0 = no data).
+    //
+    //
+    // Then, shift one space left.
+    //    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
+    // 1  x x x _ _ _ _ _ _ _  _  _  _  _  0  x  o  x  o  x  0  0  0  0  0  0  0  0  0  0  0  0
+    // .  ...
+    // .  ...
+    // .  ...
+    // 16 0 0 0 0 0 0 0 0 0 0  0  0  0  0  0  x  o  x  o  o  o  o  o  o  o  0  0  0  0  0  0  0
+    // answer_mask[1]: ................_10: "0" is in row 1 and column 17-32;
+    // There is no enough data to match (o x o x but we want to match o o o o o).
+    //
+    // answer_mask[2]: mix 2 data together (column 17-23 and column 24-32). Using Mask to make it match correctly.
+    // For example, column 23,24,25,26,27 is not a pattern and 24,25,26,27,28 is a pattern.
+    // That is why some mask bits are set to 0 from answer_mask[2] to answer_mask[10].
+
+    #[rustfmt::skip]
+    let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10,
+                                        0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10,
+                                        0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10,
+                                        0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10,
+                                        0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10,
+                                        0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
+    let mut count_match: i32 = 0;
+
+    for dir in 0..2 {
+        // direction 0 and 1
+        let mut board0 = board0org[dir];
+        let boardf = _mm512_and_si512(answer, board0);
+        let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
+        count_match += _popcnt32(temp_mask as i32);
+
+        for i in 1..11 {
+            // OOOOOOOOOOO----, the last 4 "-" cannot make an unbroken chain of five.
+            board0 = _mm512_slli_epi32(board0, 1); // shift one space left
+            let boardf = _mm512_and_si512(answer, board0); // focus on the pattern
+            let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); // see if it matches the pattern
+            count_match += _popcnt32(temp_mask as i32);
+        }
+    }
+
+    if count_match > 0 {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+#[target_feature(enable = "avx512f,avx512bw")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool {
+    let coloridx = sd as usize;
+    let emptyidx = Color::Empty as usize;
+
+    #[rustfmt::skip]
+    let answer_color = _mm512_set1_epi16(         (1<<14)|(1<<13)|(1<<12)|(1<<11)         );
+    #[rustfmt::skip]
+    let answer_empty = _mm512_set1_epi16( (1<<15)|                                (1<<10) );
+    #[rustfmt::skip]
+    let answer       = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10) );
+
+    #[rustfmt::skip]
+    let answer_mask: [__mmask32; 10] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10,
+                                        0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_10_10,
+                                        0b00_10_11_11_11_11_11_11_11_10_10_10_10_10_11_10,
+                                        0b00_10_10_11_11_11_11_11_10_10_10_10_10_11_11_10,
+                                        0b00_10_10_10_11_11_11_10_10_10_10_10_11_11_11_10,
+                                        0b00_10_10_10_10_11_10_10_10_10_10_11_11_11_11_10];
+    let board0org: [__m512i; 2] = [
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+    ];
+    let board1org: [__m512i; 2] = [
+        _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
+        _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
+    ];
+
+    let mut count_match: i32 = 0;
+
+    for dir in 0..2 {
+        let mut board0 = board0org[dir];
+        let mut board1 = board1org[dir];
+
+        let boardf1 = _mm512_and_si512(answer_color, board0);
+        let boardf2 = _mm512_and_si512(answer_empty, board1);
+        let boardf = _mm512_or_si512(boardf1, boardf2);
+
+        let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
+        count_match += _popcnt32(temp_mask as i32);
+
+        for i in 1..10 {
+            board0 = _mm512_slli_epi32(board0, 1);
+            board1 = _mm512_slli_epi32(board1, 1);
+
+            let boardf1 = _mm512_and_si512(answer_color, board0);
+            let boardf2 = _mm512_and_si512(answer_empty, board1);
+            let boardf = _mm512_or_si512(boardf1, boardf2);
+
+            let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf);
+            count_match += _popcnt32(temp_mask as i32);
+        }
+    }
+
+    if count_match > 0 {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+#[target_feature(enable = "avx512f,avx512bw")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 {
+    let coloridx = sd as usize;
+    let emptyidx = Color::Empty as usize;
+
+    #[rustfmt::skip]
+    let answer_color: [__m512i; 5] = [_mm512_set1_epi16(         (1<<14)|(1<<13)|(1<<12)|(1<<11) ),
+                                      _mm512_set1_epi16( (1<<15)|        (1<<13)|(1<<12)|(1<<11) ),
+                                      _mm512_set1_epi16( (1<<15)|(1<<14)        |(1<<12)|(1<<11) ),
+                                      _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)        |(1<<11) ),
+                                      _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)         )];
+    #[rustfmt::skip]
+    let answer_empty: [__m512i; 5]= [_mm512_set1_epi16( 1<<15 ),
+                                     _mm512_set1_epi16(          1<<14 ),
+                                     _mm512_set1_epi16(                  1<<13 ),
+                                     _mm512_set1_epi16(                          1<<12 ),
+                                     _mm512_set1_epi16(                                   1<<11)];
+    #[rustfmt::skip]
+    let answer       = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11));
+
+    #[rustfmt::skip]
+    let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10,
+                                        0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10,
+                                        0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10,
+                                        0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10,
+                                        0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10,
+                                        0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
+    let board0org: [__m512i; 2] = [
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+    ];
+    let board1org: [__m512i; 2] = [
+        _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
+        _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
+    ];
+
+    let mut count_match: i32 = 0;
+
+    for pattern in 0..5 {
+        for dir in 0..2 {
+            let mut board0 = board0org[dir];
+            let mut board1 = board1org[dir];
+
+            let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+            let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+            let boardf = _mm512_or_si512(boardf1, boardf2);
+
+            let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
+            count_match += _popcnt32(temp_mask as i32);
+
+            for i in 1..11 {
+                board0 = _mm512_slli_epi32(board0, 1);
+                board1 = _mm512_slli_epi32(board1, 1);
+
+                let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+                let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+                let boardf = _mm512_or_si512(boardf1, boardf2);
+
+                let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf);
+                count_match += _popcnt32(temp_mask as i32);
+            }
+        }
+    }
+
+    count_match
+}
+
+#[target_feature(enable = "avx512f,avx512bw")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn check_patternlive3_avx512(pos: &Pos, sd: Side) -> i32 {
+    let coloridx = sd as usize;
+    let emptyidx = Color::Empty as usize;
+
+    #[rustfmt::skip]
+    let board0org: [__m512i; 2]  = [_mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0])];
+    #[rustfmt::skip]
+    let board1org: [__m512i; 2]  = [_mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0])];
+
+    #[rustfmt::skip]
+    let answer_color: [__m512i; 1] = [_mm512_set1_epi16(         (1<<14)|(1<<13)|(1<<12)         )];
+    #[rustfmt::skip]
+    let answer_empty: [__m512i; 1] = [_mm512_set1_epi16( (1<<15)|                        (1<<11) )];
+    #[rustfmt::skip]
+    let answer: __m512i = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11) );
+
+    let mut count_match: i32 = 0;
+
+    #[rustfmt::skip]
+    let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10,
+                                        0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10,
+                                        0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10,
+                                        0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10,
+                                        0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10,
+                                        0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
+    for pattern in 0..1 {
+        for dir in 0..2 {
+            let mut board0 = board0org[dir];
+            let mut board1 = board1org[dir];
+
+            let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+            let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+            let boardf = _mm512_or_si512(boardf1, boardf2);
+
+            let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
+            count_match += _popcnt32(temp_mask as i32);
+
+            for i in 1..11 {
+                board0 = _mm512_slli_epi32(board0, 1);
+                board1 = _mm512_slli_epi32(board1, 1);
+
+                let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+                let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+                let boardf = _mm512_or_si512(boardf1, boardf2);
+
+                let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf);
+                count_match += _popcnt32(temp_mask as i32);
+            }
+        }
+    }
+
+    #[rustfmt::skip]
+    let answer_color: [__m512i; 2] = [_mm512_set1_epi16(          (1<<14)|        (1<<12)|(1<<11) ),
+                                      _mm512_set1_epi16(          (1<<14)|(1<<13)        |(1<<11) )];
+    #[rustfmt::skip]
+    let answer_empty: [__m512i; 2] = [_mm512_set1_epi16( (1<<15)|         (1<<13)|                (1<<10) ),
+                                      _mm512_set1_epi16( (1<<15)|                 (1<<12)|        (1<<10) )];
+    #[rustfmt::skip]
+    let answer: __m512i = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10) );
+
+    #[rustfmt::skip]
+    let answer_mask: [__mmask32; 10] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10,
+                                        0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10,
+                                        0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_10_10,
+                                        0b00_10_11_11_11_11_11_11_11_10_10_10_10_10_11_10,
+                                        0b00_10_10_11_11_11_11_11_10_10_10_10_10_11_11_10,
+                                        0b00_10_10_10_11_11_11_10_10_10_10_10_11_11_11_10,
+                                        0b00_10_10_10_10_11_10_10_10_10_10_11_11_11_11_10];
+    for pattern in 0..2 {
+        for dir in 0..2 {
+            let mut board0 = board0org[dir];
+            let mut board1 = board1org[dir];
+
+            let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+            let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+            let boardf = _mm512_or_si512(boardf1, boardf2);
+
+            let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
+            count_match += _popcnt32(temp_mask as i32);
+
+            for i in 1..10 {
+                board0 = _mm512_slli_epi32(board0, 1);
+                board1 = _mm512_slli_epi32(board1, 1);
+
+                let boardf1 = _mm512_and_si512(answer_color[pattern], board0);
+                let boardf2 = _mm512_and_si512(answer_empty[pattern], board1);
+                let boardf = _mm512_or_si512(boardf1, boardf2);
+
+                let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf);
+                count_match += _popcnt32(temp_mask as i32);
+            }
+        }
+    }
+
+    count_match
+}
+
+fn main() {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx512bw") {
+            println!("\n\nThe program is running with avx512f and avx512bw intrinsics\n\n");
+        } else {
+            println!("\n\nThe program is running with NO intrinsics.\n\n");
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        println!("\n\nThe program is running with NO intrinsics.\n\n");
+    }
+
+    loop {
+        let start = Instant::now();
+
+        println!("Hello, this is Connect5 (Outer-Open Gomoku)!");
+        println!("Self-playing with search depth = 4");
+
+        let test_state: [Color; SQUARE_SIZE as usize] = [Color::Empty; SQUARE_SIZE as usize];
+        let test_bitboard: [[[i32; 16]; 2]; 3] = [[[0; 16]; 2]; 3];
+
+        let mut test1 = Pos {
+            state: test_state,
+            p_turn: Color::Black,
+            bitboard: test_bitboard,
+        };
+
+        test1.init();
+
+        let mut count: i32 = 0;
+
+        for i in 0..(FILE_SIZE * RANK_SIZE) {
+            let mut next_move: Move = square_make(1, 7); // set the first move is (1,7)
+
+            if i > 0 {
+                next_move = search(&test1, -EVAL_INF, EVAL_INF, 4, 0);
+            } // search depth = 4
+
+            test1.do_move(next_move);
+            pos_disp(&test1);
+
+            if pos_is_end(&test1) {
+                println!("Game over!!!!!! at Move {}", i);
+                count = i + 1;
+                break;
+            }
+        }
+
+        let duration = start.elapsed();
+
+        println!(
+            "Average time for each move is: {:?}",
+            duration / count as u32
+        );
+    }
+}
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
new file mode 100644
index 000000000..812836d66
--- /dev/null
+++ b/library/stdarch/examples/hex.rs
@@ -0,0 +1,401 @@
+//! An example showing runtime dispatch to an architecture-optimized
+//! implementation.
+//!
+//! This program implements hex encoding a slice into a predetermined
+//! destination using various different instruction sets. This selects at
+//! runtime the most optimized implementation and uses that rather than being
+//! required to be compiled differently.
+//!
+//! You can test out this program via:
+//!
+//!     echo test | cargo +nightly run --release hex
+//!
+//! and you should see `746573740a` get printed out.
+
+#![feature(stdsimd, wasm_target_feature)]
+#![cfg_attr(test, feature(test))]
+#![allow(
+    clippy::unwrap_used,
+    clippy::print_stdout,
+    clippy::unwrap_used,
+    clippy::shadow_reuse,
+    clippy::cast_possible_wrap,
+    clippy::cast_ptr_alignment,
+    clippy::cast_sign_loss,
+    clippy::missing_docs_in_private_items
+)]
+
+use std::{
+    io::{self, Read},
+    str,
+};
+
+#[cfg(target_arch = "x86")]
+use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected};
+#[cfg(target_arch = "x86_64")]
+use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected};
+
+fn main() {
+    let mut input = Vec::new();
+    io::stdin().read_to_end(&mut input).unwrap();
+    let mut dst = vec![0; 2 * input.len()];
+    let s = hex_encode(&input, &mut dst).unwrap();
+    println!("{}", s);
+}
+
+fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    let len = src.len().checked_mul(2).unwrap();
+    if dst.len() < len {
+        return Err(len);
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx2") {
+            return unsafe { hex_encode_avx2(src, dst) };
+        }
+        if is_x86_feature_detected!("sse4.1") {
+            return unsafe { hex_encode_sse41(src, dst) };
+        }
+    }
+    #[cfg(target_arch = "wasm32")]
+    {
+        if true {
+            return unsafe { hex_encode_simd128(src, dst) };
+        }
+    }
+
+    hex_encode_fallback(src, dst)
+}
+
+#[target_feature(enable = "avx2")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    let ascii_zero = _mm256_set1_epi8(b'0' as i8);
+    let nines = _mm256_set1_epi8(9);
+    let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
+    let and4bits = _mm256_set1_epi8(0xf);
+
+    let mut i = 0_isize;
+    while src.len() >= 32 {
+        let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
+
+        let masked1 = _mm256_and_si256(invec, and4bits);
+        let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
+
+        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
+        let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
+        let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
+
+        // add '0' or the offset depending on the masks
+        let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
+        let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
+
+        // interleave masked1 and masked2 bytes
+        let res1 = _mm256_unpacklo_epi8(masked2, masked1);
+        let res2 = _mm256_unpackhi_epi8(masked2, masked1);
+
+        // Store everything into the right destination now
+        let base = dst.as_mut_ptr().offset(i * 2);
+        let base1 = base.offset(0) as *mut _;
+        let base2 = base.offset(16) as *mut _;
+        let base3 = base.offset(32) as *mut _;
+        let base4 = base.offset(48) as *mut _;
+        _mm256_storeu2_m128i(base3, base1, res1);
+        _mm256_storeu2_m128i(base4, base2, res2);
+        src = &src[32..];
+        i += 32;
+    }
+
+    let i = i as usize;
+    let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
+
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+}
+
+// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
+#[target_feature(enable = "sse4.1")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    let ascii_zero = _mm_set1_epi8(b'0' as i8);
+    let nines = _mm_set1_epi8(9);
+    let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
+    let and4bits = _mm_set1_epi8(0xf);
+
+    let mut i = 0_isize;
+    while src.len() >= 16 {
+        let invec = _mm_loadu_si128(src.as_ptr() as *const _);
+
+        let masked1 = _mm_and_si128(invec, and4bits);
+        let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
+
+        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
+        let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
+        let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
+
+        // add '0' or the offset depending on the masks
+        let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
+        let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
+
+        // interleave masked1 and masked2 bytes
+        let res1 = _mm_unpacklo_epi8(masked2, masked1);
+        let res2 = _mm_unpackhi_epi8(masked2, masked1);
+
+        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
+        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
+        src = &src[16..];
+        i += 16;
+    }
+
+    let i = i as usize;
+    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
+
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+}
+
+#[cfg(target_arch = "wasm32")]
+#[target_feature(enable = "simd128")]
+unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    use core_arch::arch::wasm32::*;
+
+    let ascii_zero = u8x16_splat(b'0');
+    let nines = u8x16_splat(9);
+    let ascii_a = u8x16_splat(b'a' - 9 - 1);
+    let and4bits = u8x16_splat(0xf);
+
+    let mut i = 0_isize;
+    while src.len() >= 16 {
+        let invec = v128_load(src.as_ptr() as *const _);
+
+        let masked1 = v128_and(invec, and4bits);
+        let masked2 = v128_and(u8x16_shr(invec, 4), and4bits);
+
+        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
+        let cmpmask1 = u8x16_gt(masked1, nines);
+        let cmpmask2 = u8x16_gt(masked2, nines);
+
+        // add '0' or the offset depending on the masks
+        let masked1 = u8x16_add(masked1, v128_bitselect(ascii_a, ascii_zero, cmpmask1));
+        let masked2 = u8x16_add(masked2, v128_bitselect(ascii_a, ascii_zero, cmpmask2));
+
+        // Next we need to shuffle around masked{1,2} to get back to the
+        // original source text order. The first element (res1) we'll store uses
+        // all the low bytes from the 2 masks and the second element (res2) uses
+        // all the upper bytes.
+        let res1 = u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(
+            masked2, masked1,
+        );
+        let res2 = u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(
+            masked2, masked1,
+        );
+
+        v128_store(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
+        v128_store(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
+        src = &src[16..];
+        i += 16;
+    }
+
+    let i = i as usize;
+    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
+
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+}
+
+fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    fn hex(byte: u8) -> u8 {
+        static TABLE: &[u8] = b"0123456789abcdef";
+        TABLE[byte as usize]
+    }
+
+    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
+        slots[0] = hex((*byte >> 4) & 0xf);
+        slots[1] = hex(*byte & 0xf);
+    }
+
+    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
+}
+
+// Run these with `cargo +nightly test --example hex -p stdarch`
+#[cfg(test)]
+mod tests {
+    use std::iter;
+
+    use super::*;
+
+    fn test(input: &[u8], output: &str) {
+        let tmp = || vec![0; input.len() * 2];
+
+        assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
+        assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
+
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        unsafe {
+            if self::is_x86_feature_detected!("avx2") {
+                assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
+            }
+            if self::is_x86_feature_detected!("sse4.1") {
+                assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
+            }
+        }
+    }
+
+    #[test]
+    fn empty() {
+        test(b"", "");
+    }
+
+    #[test]
+    fn big() {
+        test(
+            &[0; 1024],
+            &iter::repeat('0').take(2048).collect::<String>(),
+        );
+    }
+
+    #[test]
+    fn odd() {
+        test(
+            &[0; 313],
+            &iter::repeat('0').take(313 * 2).collect::<String>(),
+        );
+    }
+
+    #[test]
+    fn avx_works() {
+        let mut input = [0; 33];
+        input[4] = 3;
+        input[16] = 3;
+        input[17] = 0x30;
+        input[21] = 1;
+        input[31] = 0x24;
+        test(
+            &input,
+            "\
+             0000000003000000\
+             0000000000000000\
+             0330000000010000\
+             0000000000000024\
+             00\
+             ",
+        );
+    }
+
+    quickcheck::quickcheck! {
+        fn encode_equals_fallback(input: Vec<u8>) -> bool {
+            let mut space1 = vec![0; input.len() * 2];
+            let mut space2 = vec![0; input.len() * 2];
+            let a = hex_encode(&input, &mut space1).unwrap();
+            let b = hex_encode_fallback(&input, &mut space2).unwrap();
+            a == b
+        }
+
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        fn avx_equals_fallback(input: Vec<u8>) -> bool {
+            if !self::is_x86_feature_detected!("avx2") {
+                return true
+            }
+            let mut space1 = vec![0; input.len() * 2];
+            let mut space2 = vec![0; input.len() * 2];
+            let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
+            let b = hex_encode_fallback(&input, &mut space2).unwrap();
+            a == b
+        }
+
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        fn sse41_equals_fallback(input: Vec<u8>) -> bool {
+            if !self::is_x86_feature_detected!("avx2") {
+                return true
+            }
+            let mut space1 = vec![0; input.len() * 2];
+            let mut space2 = vec![0; input.len() * 2];
+            let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
+            let b = hex_encode_fallback(&input, &mut space2).unwrap();
+            a == b
+        }
+    }
+}
+
+// Run these with `cargo +nightly bench --example hex -p stdarch`
+#[cfg(test)]
+mod benches {
+    extern crate rand;
+    extern crate test;
+
+    use self::rand::Rng;
+
+    use super::*;
+
+    const SMALL_LEN: usize = 117;
+    const LARGE_LEN: usize = 1 * 1024 * 1024;
+
+    fn doit(
+        b: &mut test::Bencher,
+        len: usize,
+        f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
+    ) {
+        let mut rng = rand::thread_rng();
+        let input = std::iter::repeat(())
+            .map(|()| rng.gen::<u8>())
+            .take(len)
+            .collect::<Vec<_>>();
+        let mut dst = vec![0; input.len() * 2];
+        b.bytes = len as u64;
+        b.iter(|| unsafe {
+            f(&input, &mut dst).unwrap();
+            dst[0]
+        });
+    }
+
+    #[bench]
+    fn small_default(b: &mut test::Bencher) {
+        doit(b, SMALL_LEN, hex_encode);
+    }
+
+    #[bench]
+    fn small_fallback(b: &mut test::Bencher) {
+        doit(b, SMALL_LEN, hex_encode_fallback);
+    }
+
+    #[bench]
+    fn large_default(b: &mut test::Bencher) {
+        doit(b, LARGE_LEN, hex_encode);
+    }
+
+    #[bench]
+    fn large_fallback(b: &mut test::Bencher) {
+        doit(b, LARGE_LEN, hex_encode_fallback);
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    mod x86 {
+        use super::*;
+
+        #[bench]
+        fn small_avx2(b: &mut test::Bencher) {
+            if self::is_x86_feature_detected!("avx2") {
+                doit(b, SMALL_LEN, hex_encode_avx2);
+            }
+        }
+
+        #[bench]
+        fn small_sse41(b: &mut test::Bencher) {
+            if self::is_x86_feature_detected!("sse4.1") {
+                doit(b, SMALL_LEN, hex_encode_sse41);
+            }
+        }
+
+        #[bench]
+        fn large_avx2(b: &mut test::Bencher) {
+            if self::is_x86_feature_detected!("avx2") {
+                doit(b, LARGE_LEN, hex_encode_avx2);
+            }
+        }
+
+        #[bench]
+        fn large_sse41(b: &mut test::Bencher) {
+            if self::is_x86_feature_detected!("sse4.1") {
+                doit(b, LARGE_LEN, hex_encode_sse41);
+            }
+        }
+    }
+}
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
new file mode 100644
index 000000000..6b92ae9b8
--- /dev/null
+++ b/library/stdarch/examples/wasm.rs
@@ -0,0 +1,45 @@
+//! A simple slab allocator for pages in wasm
+
+#![feature(stdsimd)]
+#![cfg(target_arch = "wasm32")]
+
+use std::ptr;
+
+use core_arch::arch::wasm32::*;
+
+static mut HEAD: *mut *mut u8 = 0 as _;
+
+#[no_mangle]
+pub unsafe extern "C" fn page_alloc() -> *mut u8 {
+    if !HEAD.is_null() {
+        let next = *HEAD;
+        let ret = HEAD;
+        HEAD = next as *mut _;
+        return ret as *mut u8;
+    }
+
+    let ret = memory_grow(0, 1);
+
+    // if we failed to allocate a page then return null
+    if ret == usize::MAX {
+        return ptr::null_mut();
+    }
+
+    ((ret as u32) * page_size()) as *mut u8
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn page_free(page: *mut u8) {
+    let page = page as *mut *mut u8;
+    *page = HEAD as *mut u8;
+    HEAD = page;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn memory_used() -> usize {
+    (page_size() * (memory_size(0) as u32)) as usize
+}
+
+fn page_size() -> u32 {
+    64 * 1024
+}